diff --git a/.clang-tidy b/.clang-tidy
index e2f318562ec..219ac263ab3 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -22,6 +22,7 @@ Checks: [
'-bugprone-exception-escape',
'-bugprone-forward-declaration-namespace',
'-bugprone-implicit-widening-of-multiplication-result',
+ '-bugprone-multi-level-implicit-pointer-conversion',
'-bugprone-narrowing-conversions',
'-bugprone-not-null-terminated-result',
'-bugprone-reserved-identifier', # useful but too slow, TODO retry when https://reviews.llvm.org/rG1c282052624f9d0bd273bde0b47b30c96699c6c7 is merged
@@ -98,6 +99,7 @@ Checks: [
'-modernize-use-nodiscard',
'-modernize-use-trailing-return-type',
+ '-performance-enum-size',
'-performance-inefficient-string-concatenation',
'-performance-no-int-to-ptr',
'-performance-avoid-endl',
@@ -105,6 +107,7 @@ Checks: [
'-portability-simd-intrinsics',
+ '-readability-avoid-nested-conditional-operator',
'-readability-avoid-unconditional-preprocessor-if',
'-readability-braces-around-statements',
'-readability-convert-member-functions-to-static',
@@ -118,6 +121,12 @@ Checks: [
'-readability-magic-numbers',
'-readability-named-parameter',
'-readability-redundant-declaration',
+ '-readability-redundant-inline-specifier',
+ '-readability-redundant-member-init', # Useful but triggers another problem. Imagine a struct S with multiple String members. Structs are often instantiated via designated
+ # initializer S s{.s1 = [...], .s2 = [...], [...]}. In this case, compiler warning `missing-field-initializers` requires to specify all members which are not in-struct
+ # initialized (example: s1 in struct S { String s1; String s2{};}; is not in-struct initialized, therefore it must be specified at instantiation time). As explicitly
+ # specifying all members is tedious for large structs, `missing-field-initializers` makes programmers initialize as many members as possible in-struct. Clang-tidy
+ # warning `readability-redundant-member-init` does the opposite thing, both are not compatible with each other.
'-readability-simplify-boolean-expr',
'-readability-suspicious-call-argument',
'-readability-uppercase-literal-suffix',
@@ -125,17 +134,6 @@ Checks: [
'-zircon-*',
- # These are new in clang-18, and we have to sort them out:
- '-readability-avoid-nested-conditional-operator',
- '-modernize-use-designated-initializers',
- '-performance-enum-size',
- '-readability-redundant-inline-specifier',
- '-readability-redundant-member-init',
- '-bugprone-crtp-constructor-accessibility',
- '-bugprone-suspicious-stringview-data-usage',
- '-bugprone-multi-level-implicit-pointer-conversion',
- '-cert-err33-c',
-
# This is a good check, but clang-tidy crashes, see https://github.com/llvm/llvm-project/issues/91872
'-modernize-use-constraints',
# https://github.com/abseil/abseil-cpp/issues/1667
diff --git a/.github/actions/common_setup/action.yml b/.github/actions/common_setup/action.yml
index e492fa97816..b9299c64e72 100644
--- a/.github/actions/common_setup/action.yml
+++ b/.github/actions/common_setup/action.yml
@@ -28,3 +28,10 @@ runs:
run: |
# to remove every leftovers
sudo rm -fr "$TEMP_PATH" && mkdir -p "$TEMP_PATH"
+ - name: Tune vm.mmap_rnd_bits for sanitizers
+ shell: bash
+ run: |
+ sudo sysctl vm.mmap_rnd_bits
+ # https://github.com/google/sanitizers/issues/856
+ echo "Tune vm.mmap_rnd_bits for sanitizers"
+ sudo sysctl vm.mmap_rnd_bits=28
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index 9f16e32707e..f20e987db97 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -130,15 +130,21 @@ jobs:
with:
stage: Tests_2
data: ${{ needs.RunConfig.outputs.data }}
+ # stage for jobs that do not prohibit merge
+ Tests_3:
+ needs: [RunConfig, Tests_1, Tests_2]
+ if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_3') }}
+ uses: ./.github/workflows/reusable_test_stage.yml
+ with:
+ stage: Tests_3
+ data: ${{ needs.RunConfig.outputs.data }}
################################# Reports #################################
- # Reports should by run even if Builds_1/2 fail, so put them separatly in wf (not in Tests_1/2)
+ # Reports should by run even if Builds_1/2 fail, so put them separately in wf (not in Tests_1/2)
Builds_1_Report:
# run report check for failed builds to indicate the CI error
- if: ${{ !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse build check') }}
- needs:
- - RunConfig
- - Builds_1
+ if: ${{ !cancelled() && needs.StyleCheck.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse build check') }}
+ needs: [RunConfig, StyleCheck, Builds_1]
uses: ./.github/workflows/reusable_test.yml
with:
test_name: ClickHouse build check
@@ -146,25 +152,39 @@ jobs:
data: ${{ needs.RunConfig.outputs.data }}
Builds_2_Report:
# run report check for failed builds to indicate the CI error
- if: ${{ !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse special build check') }}
- needs:
- - RunConfig
- - Builds_2
+ if: ${{ !cancelled() && needs.StyleCheck.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse special build check') }}
+ needs: [RunConfig, StyleCheck, Builds_2]
uses: ./.github/workflows/reusable_test.yml
with:
test_name: ClickHouse special build check
runner_type: style-checker-aarch64
data: ${{ needs.RunConfig.outputs.data }}
+ CheckReadyForMerge:
+ if: ${{ !cancelled() && needs.StyleCheck.result == 'success' }}
+ needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_1_Report, Builds_2_Report, Tests_1, Tests_2]
+ runs-on: [self-hosted, style-checker-aarch64]
+ steps:
+ - name: Check out repository code
+ uses: ClickHouse/checkout@v1
+ with:
+ filter: tree:0
+ - name: Check and set merge status
+ run: |
+ cd "$GITHUB_WORKSPACE/tests/ci"
+ python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
+
################################# Stage Final #################################
#
FinishCheck:
if: ${{ !failure() && !cancelled() }}
- needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_1_Report, Builds_2_Report, Tests_1, Tests_2]
+ needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_1_Report, Builds_2_Report, Tests_1, Tests_2, Tests_3]
runs-on: [self-hosted, style-checker]
steps:
- name: Check out repository code
uses: ClickHouse/checkout@v1
+ with:
+ filter: tree:0
- name: Finish label
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
diff --git a/CMakeLists.txt b/CMakeLists.txt
index abbc48ab23a..96ba2961d3a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -61,13 +61,16 @@ if (ENABLE_CHECK_HEAVY_BUILDS)
# set CPU time limit to 1000 seconds
set (RLIMIT_CPU 1000)
- # Sanitizers are too heavy
- if (SANITIZE OR SANITIZE_COVERAGE OR WITH_COVERAGE)
- set (RLIMIT_DATA 10000000000) # 10G
+ # Sanitizers are too heavy. Some architectures too.
+ if (SANITIZE OR SANITIZE_COVERAGE OR WITH_COVERAGE OR ARCH_RISCV64 OR ARCH_LOONGARCH64)
+ # Twice as large
+ set (RLIMIT_DATA 10000000000)
+ set (RLIMIT_AS 20000000000)
endif()
- # For some files currently building RISCV64 might be too slow. TODO: Improve compilation times per file
- if (ARCH_RISCV64)
+ # For some files currently building RISCV64/LOONGARCH64 might be too slow.
+ # TODO: Improve compilation times per file
+ if (ARCH_RISCV64 OR ARCH_LOONGARCH64)
set (RLIMIT_CPU 1800)
endif()
diff --git a/base/base/cgroupsv2.cpp b/base/base/cgroupsv2.cpp
index bea2e99fa51..f20b9daf22e 100644
--- a/base/base/cgroupsv2.cpp
+++ b/base/base/cgroupsv2.cpp
@@ -9,11 +9,18 @@
bool cgroupsV2Enabled()
{
#if defined(OS_LINUX)
- /// This file exists iff the host has cgroups v2 enabled.
- auto controllers_file = default_cgroups_mount / "cgroup.controllers";
- if (!std::filesystem::exists(controllers_file))
- return false;
- return true;
+ try
+ {
+ /// This file exists iff the host has cgroups v2 enabled.
+ auto controllers_file = default_cgroups_mount / "cgroup.controllers";
+ if (!std::filesystem::exists(controllers_file))
+ return false;
+ return true;
+ }
+ catch (const std::filesystem::filesystem_error &) /// all "underlying OS API errors", typically: permission denied
+ {
+ return false; /// not logging the exception as most callers fall back to cgroups v1
+ }
#else
return false;
#endif
diff --git a/cmake/freebsd/default_libs.cmake b/cmake/freebsd/default_libs.cmake
index 1eeb1a872bd..6bde75f8c9a 100644
--- a/cmake/freebsd/default_libs.cmake
+++ b/cmake/freebsd/default_libs.cmake
@@ -1,11 +1,23 @@
set (DEFAULT_LIBS "-nodefaultlibs")
if (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "amd64")
- execute_process (COMMAND ${CMAKE_CXX_COMPILER} --print-file-name=libclang_rt.builtins-x86_64.a OUTPUT_VARIABLE BUILTINS_LIBRARY OUTPUT_STRIP_TRAILING_WHITESPACE)
+ set(system_processor "x86_64")
else ()
- execute_process (COMMAND ${CMAKE_CXX_COMPILER} --print-file-name=libclang_rt.builtins-${CMAKE_SYSTEM_PROCESSOR}.a OUTPUT_VARIABLE BUILTINS_LIBRARY OUTPUT_STRIP_TRAILING_WHITESPACE)
+ set(system_processor "${CMAKE_SYSTEM_PROCESSOR}")
endif ()
+file(GLOB bprefix "/usr/local/llvm${COMPILER_VERSION_MAJOR}/lib/clang/${COMPILER_VERSION_MAJOR}/lib/${system_processor}-portbld-freebsd*/")
+message(STATUS "-Bprefix: ${bprefix}")
+
+execute_process(COMMAND ${CMAKE_CXX_COMPILER} -Bprefix=${bprefix} --print-file-name=libclang_rt.builtins-${system_processor}.a OUTPUT_VARIABLE BUILTINS_LIBRARY OUTPUT_STRIP_TRAILING_WHITESPACE)
+# --print-file-name simply prints what you passed in case of nothing was resolved, so let's try one other possible option
+if (BUILTINS_LIBRARY STREQUAL "libclang_rt.builtins-${system_processor}.a")
+ execute_process(COMMAND ${CMAKE_CXX_COMPILER} -Bprefix=${bprefix} --print-file-name=libclang_rt.builtins.a OUTPUT_VARIABLE BUILTINS_LIBRARY OUTPUT_STRIP_TRAILING_WHITESPACE)
+endif()
+if (BUILTINS_LIBRARY STREQUAL "libclang_rt.builtins.a")
+ message(FATAL_ERROR "libclang_rt.builtins had not been found")
+endif()
+
set (DEFAULT_LIBS "${DEFAULT_LIBS} ${BUILTINS_LIBRARY} ${COVERAGE_OPTION} -lc -lm -lrt -lpthread")
message(STATUS "Default libraries: ${DEFAULT_LIBS}")
diff --git a/contrib/libbcrypt-cmake/CMakeLists.txt b/contrib/libbcrypt-cmake/CMakeLists.txt
index d40d7f9195e..9e97f0af493 100644
--- a/contrib/libbcrypt-cmake/CMakeLists.txt
+++ b/contrib/libbcrypt-cmake/CMakeLists.txt
@@ -7,7 +7,7 @@ endif()
set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/libbcrypt")
-set(SRCS
+set(SRCS
"${LIBRARY_DIR}/bcrypt.c"
"${LIBRARY_DIR}/crypt_blowfish/crypt_blowfish.c"
"${LIBRARY_DIR}/crypt_blowfish/crypt_gensalt.c"
@@ -16,4 +16,13 @@ set(SRCS
add_library(_bcrypt ${SRCS})
target_include_directories(_bcrypt SYSTEM PUBLIC "${LIBRARY_DIR}")
+# Avoid conflicts for crypt_r on FreeBSD [1]:
+#
+# - char *crypt_r(__const char *key, __const char *setting, void *data);
+# - char *crypt_r(const char *, const char *, struct crypt_data *);
+#
+# [1]: https://github.com/freebsd/freebsd-src/commit/5f521d7ba72145092ea23ff6081d8791ad6c1f9d
+#
+# NOTE: ow-crypt.h is unsed only internally, so PRIVATE is enough
+target_compile_definitions(_bcrypt PRIVATE -D__SKIP_GNU)
add_library(ch_contrib::bcrypt ALIAS _bcrypt)
diff --git a/contrib/libunwind b/contrib/libunwind
index 854538ce337..d6a01c46327 160000
--- a/contrib/libunwind
+++ b/contrib/libunwind
@@ -1 +1 @@
-Subproject commit 854538ce337d631b619010528adff22cd58f9dce
+Subproject commit d6a01c46327e56fd86beb8aaa31591fcd9a6b7df
diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh
index bc7ffd1c2ef..4d5159cfa9e 100755
--- a/docker/test/fasttest/run.sh
+++ b/docker/test/fasttest/run.sh
@@ -160,10 +160,17 @@ function clone_submodules
git submodule sync
git submodule init
- # --jobs does not work as fast as real parallel running
- printf '%s\0' "${SUBMODULES_TO_UPDATE[@]}" | \
- xargs --max-procs=100 --null --no-run-if-empty --max-args=1 \
- git submodule update --depth 1 --single-branch
+
+ # Network is unreliable
+ for _ in {1..10}
+ do
+ # --jobs does not work as fast as real parallel running
+ printf '%s\0' "${SUBMODULES_TO_UPDATE[@]}" | \
+ xargs --max-procs=100 --null --no-run-if-empty --max-args=1 \
+ git submodule update --depth 1 --single-branch && break
+ sleep 1
+ done
+
git submodule foreach git reset --hard
git submodule foreach git checkout @ -f
git submodule foreach git clean -xfd
diff --git a/docker/test/fuzzer/query-fuzzer-tweaks-users.xml b/docker/test/fuzzer/query-fuzzer-tweaks-users.xml
index ad261be1abe..e2a4976b385 100644
--- a/docker/test/fuzzer/query-fuzzer-tweaks-users.xml
+++ b/docker/test/fuzzer/query-fuzzer-tweaks-users.xml
@@ -36,6 +36,11 @@
+
+
+
+
+
diff --git a/docker/test/stateless/stress_tests.lib b/docker/test/stateless/stress_tests.lib
index 6aaddbfe590..3b6ad244c82 100644
--- a/docker/test/stateless/stress_tests.lib
+++ b/docker/test/stateless/stress_tests.lib
@@ -154,6 +154,11 @@ EOL
+
+
+
+
+
diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh
index 6761ddba3e5..29174cc87e6 100644
--- a/docker/test/upgrade/run.sh
+++ b/docker/test/upgrade/run.sh
@@ -58,8 +58,14 @@ echo "ATTACH DATABASE system ENGINE=Ordinary" > /var/lib/clickhouse/metadata/sys
# Install previous release packages
install_packages previous_release_package_folder
-# Save old settings from system table for settings changes check
-clickhouse-local -q "select * from system.settings format Native" > old_settings.native
+# NOTE: we need to run clickhouse-local under script to get settings without any adjustments, like clickhouse-local does in case of stdout is not a tty
+function save_settings_clean()
+{
+ local out=$1 && shift
+ script -q -c "clickhouse-local -q \"select * from system.settings into outfile '$out'\"" --log-out /dev/null
+}
+
+save_settings_clean 'old_settings.native'
# Initial run without S3 to create system.*_log on local file system to make it
# available for dump via clickhouse-local
@@ -183,7 +189,7 @@ configure
IS_SANITIZED=$(clickhouse-local --query "SELECT value LIKE '%-fsanitize=%' FROM system.build_options WHERE name = 'CXX_FLAGS'")
if [ "${IS_SANITIZED}" -eq "0" ]
then
- clickhouse-local -q "select * from system.settings format Native" > new_settings.native
+ save_settings_clean 'new_settings.native'
clickhouse-local -nmq "
CREATE TABLE old_settings AS file('old_settings.native');
CREATE TABLE new_settings AS file('new_settings.native');
diff --git a/docs/en/operations/backup.md b/docs/en/operations/backup.md
index 2ba50b39934..46c24ad8491 100644
--- a/docs/en/operations/backup.md
+++ b/docs/en/operations/backup.md
@@ -22,7 +22,7 @@ description: In order to effectively mitigate possible human errors, you should
TEMPORARY TABLE table_name [AS table_name_in_backup] |
VIEW view_name [AS view_name_in_backup]
ALL TEMPORARY TABLES [EXCEPT ...] |
- ALL DATABASES [EXCEPT ...] } [,...]
+ ALL [EXCEPT ...] } [,...]
[ON CLUSTER 'cluster_name']
TO|FROM File('/') | Disk('', '/') | S3('/', '', '')
[SETTINGS base_backup = File('/') | Disk(...) | S3('/', '', '')]
diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md
index 7005783dd60..9b316960750 100644
--- a/docs/en/operations/storing-data.md
+++ b/docs/en/operations/storing-data.md
@@ -7,27 +7,27 @@ title: "External Disks for Storing Data"
Data, processed in ClickHouse, is usually stored in the local file system — on the same machine with the ClickHouse server. That requires large-capacity disks, which can be expensive enough. To avoid that you can store the data remotely. Various storages are supported:
1. [Amazon S3](https://aws.amazon.com/s3/) object storage.
-2. The Hadoop Distributed File System ([HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html))
-3. [Azure Blob Storage](https://azure.microsoft.com/en-us/products/storage/blobs).
+2. [Azure Blob Storage](https://azure.microsoft.com/en-us/products/storage/blobs).
+3. Unsupported: The Hadoop Distributed File System ([HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html))
:::note ClickHouse also has support for external table engines, which are different from external storage option described on this page as they allow to read data stored in some general file format (like Parquet), while on this page we are describing storage configuration for ClickHouse `MergeTree` family or `Log` family tables.
1. to work with data stored on `Amazon S3` disks, use [S3](/docs/en/engines/table-engines/integrations/s3.md) table engine.
-2. to work with data in the Hadoop Distributed File System — [HDFS](/docs/en/engines/table-engines/integrations/hdfs.md) table engine.
-3. to work with data stored in Azure Blob Storage use [AzureBlobStorage](/docs/en/engines/table-engines/integrations/azureBlobStorage.md) table engine.
+2. to work with data stored in Azure Blob Storage use [AzureBlobStorage](/docs/en/engines/table-engines/integrations/azureBlobStorage.md) table engine.
+3. Unsupported: to work with data in the Hadoop Distributed File System — [HDFS](/docs/en/engines/table-engines/integrations/hdfs.md) table engine.
:::
## Configuring external storage {#configuring-external-storage}
-[MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) and [Log](/docs/en/engines/table-engines/log-family/log.md) family table engines can store data to `S3`, `AzureBlobStorage`, `HDFS` using a disk with types `s3`, `azure_blob_storage`, `hdfs` accordingly.
+[MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) and [Log](/docs/en/engines/table-engines/log-family/log.md) family table engines can store data to `S3`, `AzureBlobStorage`, `HDFS` (unsupported) using a disk with types `s3`, `azure_blob_storage`, `hdfs` (unsupported) accordingly.
Disk configuration requires:
-1. `type` section, equal to one of `s3`, `azure_blob_storage`, `hdfs`, `local_blob_storage`, `web`.
+1. `type` section, equal to one of `s3`, `azure_blob_storage`, `hdfs` (unsupported), `local_blob_storage`, `web`.
2. Configuration of a specific external storage type.
Starting from 24.1 clickhouse version, it is possible to use a new configuration option.
It requires to specify:
1. `type` equal to `object_storage`
-2. `object_storage_type`, equal to one of `s3`, `azure_blob_storage` (or just `azure` from `24.3`), `hdfs`, `local_blob_storage` (or just `local` from `24.3`), `web`.
+2. `object_storage_type`, equal to one of `s3`, `azure_blob_storage` (or just `azure` from `24.3`), `hdfs` (unsupported), `local_blob_storage` (or just `local` from `24.3`), `web`.
Optionally, `metadata_type` can be specified (it is equal to `local` by default), but it can also be set to `plain`, `web` and, starting from `24.4`, `plain_rewritable`.
Usage of `plain` metadata type is described in [plain storage section](/docs/en/operations/storing-data.md/#storing-data-on-webserver), `web` metadata type can be used only with `web` object storage type, `local` metadata type stores metadata files locally (each metadata files contains mapping to files in object storage and some additional meta information about them).
@@ -328,7 +328,7 @@ Configuration:
```
-Starting from `24.1` it is possible configure any object storage disk (`s3`, `azure`, `hdfs`, `local`) using `plain` metadata type.
+Starting from `24.1` it is possible configure any object storage disk (`s3`, `azure`, `hdfs` (unsupported), `local`) using `plain` metadata type.
Configuration:
``` xml
@@ -428,12 +428,14 @@ Examples of working configurations can be found in integration tests directory (
Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher. This feature is not recommended for production use.
:::
-## Using HDFS storage {#hdfs-storage}
+## Using HDFS storage (Unsupported)
In this sample configuration:
-- the disk is of type `hdfs`
+- the disk is of type `hdfs` (unsupported)
- the data is hosted at `hdfs://hdfs1:9000/clickhouse/`
+By the way, HDFS is unsupported and therefore there might be issues when using it. Feel free to make a pull request with the fix if any issue arises.
+
```xml
@@ -464,9 +466,11 @@ In this sample configuration:
```
+Keep in mind that HDFS may not work in corner cases.
+
### Using Data Encryption {#encrypted-virtual-file-system}
-You can encrypt the data stored on [S3](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-s3), or [HDFS](#configuring-hdfs) external disks, or on a local disk. To turn on the encryption mode, in the configuration file you must define a disk with the type `encrypted` and choose a disk on which the data will be saved. An `encrypted` disk ciphers all written files on the fly, and when you read files from an `encrypted` disk it deciphers them automatically. So you can work with an `encrypted` disk like with a normal one.
+You can encrypt the data stored on [S3](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-s3), or [HDFS](#configuring-hdfs) (unsupported) external disks, or on a local disk. To turn on the encryption mode, in the configuration file you must define a disk with the type `encrypted` and choose a disk on which the data will be saved. An `encrypted` disk ciphers all written files on the fly, and when you read files from an `encrypted` disk it deciphers them automatically. So you can work with an `encrypted` disk like with a normal one.
Example of disk configuration:
@@ -529,7 +533,7 @@ Example of disk configuration:
It is possible to configure local cache over disks in storage configuration starting from version 22.3.
For versions 22.3 - 22.7 cache is supported only for `s3` disk type. For versions >= 22.8 cache is supported for any disk type: S3, Azure, Local, Encrypted, etc.
-For versions >= 23.5 cache is supported only for remote disk types: S3, Azure, HDFS.
+For versions >= 23.5 cache is supported only for remote disk types: S3, Azure, HDFS (unsupported).
Cache uses `LRU` cache policy.
@@ -971,7 +975,7 @@ Use [http_max_single_read_retries](/docs/en/operations/settings/settings.md/#htt
### Zero-copy Replication (not ready for production) {#zero-copy}
-Zero-copy replication is possible, but not recommended, with `S3` and `HDFS` disks. Zero-copy replication means that if the data is stored remotely on several machines and needs to be synchronized, then only the metadata is replicated (paths to the data parts), but not the data itself.
+Zero-copy replication is possible, but not recommended, with `S3` and `HDFS` (unsupported) disks. Zero-copy replication means that if the data is stored remotely on several machines and needs to be synchronized, then only the metadata is replicated (paths to the data parts), but not the data itself.
:::note Zero-copy replication is not ready for production
Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher. This feature is not recommended for production use.
diff --git a/docs/en/operations/system-tables/query_log.md b/docs/en/operations/system-tables/query_log.md
index d48eb31df00..75b855966a3 100644
--- a/docs/en/operations/system-tables/query_log.md
+++ b/docs/en/operations/system-tables/query_log.md
@@ -108,7 +108,7 @@ Columns:
- `used_aggregate_function_combinators` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `aggregate functions combinators`, which were used during query execution.
- `used_database_engines` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `database engines`, which were used during query execution.
- `used_data_type_families` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `data type families`, which were used during query execution.
-- `used_dictionaries` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `dictionaries`, which were used during query execution.
+- `used_dictionaries` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `dictionaries`, which were used during query execution. For dictionaries configured using an XML file this is the name of the dictionary, and for dictionaries created by an SQL statement, the canonical name is the fully qualified object name.
- `used_formats` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `formats`, which were used during query execution.
- `used_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `functions`, which were used during query execution.
- `used_storages` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `storages`, which were used during query execution.
diff --git a/docs/en/sql-reference/aggregate-functions/reference/analysis_of_variance.md b/docs/en/sql-reference/aggregate-functions/reference/analysis_of_variance.md
new file mode 100644
index 00000000000..d9b44b3ff07
--- /dev/null
+++ b/docs/en/sql-reference/aggregate-functions/reference/analysis_of_variance.md
@@ -0,0 +1,45 @@
+---
+slug: /en/sql-reference/aggregate-functions/reference/analysis_of_variance
+sidebar_position: 6
+---
+
+# analysisOfVariance
+
+Provides a statistical test for one-way analysis of variance (ANOVA test). It is a test over several groups of normally distributed observations to find out whether all groups have the same mean or not.
+
+**Syntax**
+
+```sql
+analysisOfVariance(val, group_no)
+```
+
+Aliases: `anova`
+
+**Parameters**
+- `val`: value.
+- `group_no` : group number that `val` belongs to.
+
+:::note
+Groups are enumerated starting from 0 and there should be at least two groups to perform a test.
+There should be at least one group with the number of observations greater than one.
+:::
+
+**Returned value**
+
+- `(f_statistic, p_value)`. [Tuple](../../data-types/tuple.md)([Float64](../../data-types/float.md), [Float64](../../data-types/float.md)).
+
+**Example**
+
+Query:
+
+```sql
+SELECT analysisOfVariance(number, number % 2) FROM numbers(1048575);
+```
+
+Result:
+
+```response
+┌─analysisOfVariance(number, modulo(number, 2))─┐
+│ (0,1) │
+└───────────────────────────────────────────────┘
+```
diff --git a/docs/en/sql-reference/aggregate-functions/reference/index.md b/docs/en/sql-reference/aggregate-functions/reference/index.md
index e9a7fe4fc2b..451ee2aae9d 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/index.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/index.md
@@ -37,6 +37,7 @@ Standard aggregate functions:
ClickHouse-specific aggregate functions:
+- [analysisOfVariance](/docs/en/sql-reference/aggregate-functions/reference/analysis_of_variance.md)
- [any](/docs/en/sql-reference/aggregate-functions/reference/any_respect_nulls.md)
- [anyHeavy](/docs/en/sql-reference/aggregate-functions/reference/anyheavy.md)
- [anyLast](/docs/en/sql-reference/aggregate-functions/reference/anylast.md)
diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md
index 11ee471d709..12b565d5358 100644
--- a/docs/en/sql-reference/functions/other-functions.md
+++ b/docs/en/sql-reference/functions/other-functions.md
@@ -998,17 +998,170 @@ SELECT version()
Returns the build ID generated by a compiler for the running ClickHouse server binary.
If executed in the context of a distributed table, this function generates a normal column with values relevant to each shard. Otherwise it produces a constant value.
-## blockNumber()
+## blockNumber
-Returns the sequence number of the data block where the row is located.
+Returns a monotonically increasing sequence number of the [block](../../development/architecture.md#block) containing the row.
+The returned block number is updated on a best-effort basis, i.e. it may not be fully accurate.
-## rowNumberInBlock() {#rowNumberInBlock}
+**Syntax**
+
+```sql
+blockNumber()
+```
+
+**Returned value**
+
+- Sequence number of the data block where the row is located. [UInt64](../data-types/int-uint.md).
+
+**Example**
+
+Query:
+
+```sql
+SELECT blockNumber()
+FROM
+(
+ SELECT *
+ FROM system.numbers
+ LIMIT 10
+) SETTINGS max_block_size = 2
+```
+
+Result:
+
+```response
+┌─blockNumber()─┐
+│ 7 │
+│ 7 │
+└───────────────┘
+┌─blockNumber()─┐
+│ 8 │
+│ 8 │
+└───────────────┘
+┌─blockNumber()─┐
+│ 9 │
+│ 9 │
+└───────────────┘
+┌─blockNumber()─┐
+│ 10 │
+│ 10 │
+└───────────────┘
+┌─blockNumber()─┐
+│ 11 │
+│ 11 │
+└───────────────┘
+```
+
+## rowNumberInBlock {#rowNumberInBlock}
+
+Returns for each [block](../../development/architecture.md#block) processed by `rowNumberInBlock` the number of the current row.
+The returned number starts for each block at 0.
+
+**Syntax**
+
+```sql
+rowNumberInBlock()
+```
+
+**Returned value**
+
+- Ordinal number of the row in the data block starting from 0. [UInt64](../data-types/int-uint.md).
+
+**Example**
+
+Query:
+
+```sql
+SELECT rowNumberInBlock()
+FROM
+(
+ SELECT *
+ FROM system.numbers_mt
+ LIMIT 10
+) SETTINGS max_block_size = 2
+```
+
+Result:
+
+```response
+┌─rowNumberInBlock()─┐
+│ 0 │
+│ 1 │
+└────────────────────┘
+┌─rowNumberInBlock()─┐
+│ 0 │
+│ 1 │
+└────────────────────┘
+┌─rowNumberInBlock()─┐
+│ 0 │
+│ 1 │
+└────────────────────┘
+┌─rowNumberInBlock()─┐
+│ 0 │
+│ 1 │
+└────────────────────┘
+┌─rowNumberInBlock()─┐
+│ 0 │
+│ 1 │
+└────────────────────┘
+```
+
+## rowNumberInAllBlocks
+
+Returns a unique row number for each row processed by `rowNumberInAllBlocks`. The returned numbers start at 0.
+
+**Syntax**
+
+```sql
+rowNumberInAllBlocks()
+```
+
+**Returned value**
+
+- Ordinal number of the row in the data block starting from 0. [UInt64](../data-types/int-uint.md).
+
+**Example**
+
+Query:
+
+```sql
+SELECT rowNumberInAllBlocks()
+FROM
+(
+ SELECT *
+ FROM system.numbers_mt
+ LIMIT 10
+)
+SETTINGS max_block_size = 2
+```
+
+Result:
+
+```response
+┌─rowNumberInAllBlocks()─┐
+│ 0 │
+│ 1 │
+└────────────────────────┘
+┌─rowNumberInAllBlocks()─┐
+│ 4 │
+│ 5 │
+└────────────────────────┘
+┌─rowNumberInAllBlocks()─┐
+│ 2 │
+│ 3 │
+└────────────────────────┘
+┌─rowNumberInAllBlocks()─┐
+│ 6 │
+│ 7 │
+└────────────────────────┘
+┌─rowNumberInAllBlocks()─┐
+│ 8 │
+│ 9 │
+└────────────────────────┘
+```
-Returns the ordinal number of the row in the data block. Different data blocks are always recalculated.
-## rowNumberInAllBlocks()
-Returns the ordinal number of the row in the data block. This function only considers the affected data blocks.
## neighbor
diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt
index 0d91de2dad8..4640882f2be 100644
--- a/programs/CMakeLists.txt
+++ b/programs/CMakeLists.txt
@@ -162,7 +162,7 @@ if (ARCH_AMD64 AND OS_LINUX AND NOT OS_ANDROID)
set (HARMFUL_LIB harmful)
endif ()
-target_link_libraries (clickhouse PRIVATE clickhouse_common_io string_utils ${HARMFUL_LIB})
+target_link_libraries (clickhouse PRIVATE clickhouse_common_io ${HARMFUL_LIB})
target_include_directories (clickhouse PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
if (ENABLE_CLICKHOUSE_KEEPER)
diff --git a/programs/client/CMakeLists.txt b/programs/client/CMakeLists.txt
index e160355ef7b..f8ef8ccaf65 100644
--- a/programs/client/CMakeLists.txt
+++ b/programs/client/CMakeLists.txt
@@ -10,7 +10,6 @@ set (CLICKHOUSE_CLIENT_LINK
clickhouse_common_io
clickhouse_functions
clickhouse_parsers
- string_utils
)
if (TARGET ch_rust::skim)
diff --git a/programs/format/Format.cpp b/programs/format/Format.cpp
index d4b975ce1e8..1b91e7ceaf3 100644
--- a/programs/format/Format.cpp
+++ b/programs/format/Format.cpp
@@ -15,7 +15,7 @@
#include
#include
#include
-#include
+#include
#include
#include
diff --git a/programs/git-import/git-import.cpp b/programs/git-import/git-import.cpp
index eaf85df67b1..5430c4b0a42 100644
--- a/programs/git-import/git-import.cpp
+++ b/programs/git-import/git-import.cpp
@@ -14,7 +14,7 @@
#include
#include
#include
-#include
+#include
#include
#include
#include
diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt
index b811868333b..af360e44ff4 100644
--- a/programs/keeper/CMakeLists.txt
+++ b/programs/keeper/CMakeLists.txt
@@ -148,6 +148,7 @@ if (BUILD_STANDALONE_KEEPER)
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/createReadBufferFromFileBase.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ReadBufferFromRemoteFSGather.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/IOUringReader.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/getIOUringReader.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/WriteBufferFromTemporaryFile.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/WriteBufferWithFinalizeCallback.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/AsynchronousBoundedReadBuffer.cpp
diff --git a/programs/keeper/clickhouse-keeper.cpp b/programs/keeper/clickhouse-keeper.cpp
index be2686d936b..f2f91930ac0 100644
--- a/programs/keeper/clickhouse-keeper.cpp
+++ b/programs/keeper/clickhouse-keeper.cpp
@@ -1,4 +1,4 @@
-#include
+#include
#include "config_tools.h"
diff --git a/programs/library-bridge/ExternalDictionaryLibraryUtils.h b/programs/library-bridge/ExternalDictionaryLibraryUtils.h
index e6bf8f2a4c3..2eb44022742 100644
--- a/programs/library-bridge/ExternalDictionaryLibraryUtils.h
+++ b/programs/library-bridge/ExternalDictionaryLibraryUtils.h
@@ -1,6 +1,6 @@
#pragma once
-#include
+#include
#include
#include
diff --git a/programs/main.cpp b/programs/main.cpp
index 4bb73399719..bc8476e4ce4 100644
--- a/programs/main.cpp
+++ b/programs/main.cpp
@@ -15,7 +15,7 @@
#include "config_tools.h"
-#include
+#include
#include
#include
diff --git a/programs/obfuscator/Obfuscator.cpp b/programs/obfuscator/Obfuscator.cpp
index 8035f053b41..688ae1a1143 100644
--- a/programs/obfuscator/Obfuscator.cpp
+++ b/programs/obfuscator/Obfuscator.cpp
@@ -19,7 +19,7 @@
#include
#include
#include
-#include
+#include
#include
#include
#include
diff --git a/programs/odbc-bridge/validateODBCConnectionString.cpp b/programs/odbc-bridge/validateODBCConnectionString.cpp
index 6c6e11162b4..72c3c9bddca 100644
--- a/programs/odbc-bridge/validateODBCConnectionString.cpp
+++ b/programs/odbc-bridge/validateODBCConnectionString.cpp
@@ -4,7 +4,7 @@
#include
#include
#include
-#include
+#include
#include "validateODBCConnectionString.h"
diff --git a/programs/server/CMakeLists.txt b/programs/server/CMakeLists.txt
index 81440b03690..76d201cc924 100644
--- a/programs/server/CMakeLists.txt
+++ b/programs/server/CMakeLists.txt
@@ -13,7 +13,6 @@ set (CLICKHOUSE_SERVER_LINK
clickhouse_parsers
clickhouse_storages_system
clickhouse_table_functions
- string_utils
${LINK_RESOURCE_LIB}
diff --git a/src/Access/User.cpp b/src/Access/User.cpp
index ef5cf722113..6a296706baf 100644
--- a/src/Access/User.cpp
+++ b/src/Access/User.cpp
@@ -1,5 +1,5 @@
#include
-#include
+#include
#include
#include
diff --git a/src/Access/UsersConfigAccessStorage.cpp b/src/Access/UsersConfigAccessStorage.cpp
index 908ff780c62..1f9a977bab6 100644
--- a/src/Access/UsersConfigAccessStorage.cpp
+++ b/src/Access/UsersConfigAccessStorage.cpp
@@ -10,7 +10,7 @@
#include
#include
#include
-#include
+#include
#include
#include
#include
diff --git a/src/AggregateFunctions/AggregateFunctionsArgMinArgMax.cpp b/src/AggregateFunctions/AggregateFunctionsArgMinArgMax.cpp
index e8f40120152..9608ca26f37 100644
--- a/src/AggregateFunctions/AggregateFunctionsArgMinArgMax.cpp
+++ b/src/AggregateFunctions/AggregateFunctionsArgMinArgMax.cpp
@@ -14,7 +14,7 @@ struct Settings;
namespace ErrorCodes
{
-extern const int CORRUPTED_DATA;
+extern const int INCORRECT_DATA;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int LOGICAL_ERROR;
}
@@ -198,7 +198,7 @@ public:
this->data(place).value().read(buf, *serialization_val, arena);
if (unlikely(this->data(place).value().has() != this->data(place).result().has()))
throw Exception(
- ErrorCodes::CORRUPTED_DATA,
+ ErrorCodes::INCORRECT_DATA,
"Invalid state of the aggregate function {}: has_value ({}) != has_result ({})",
getName(),
this->data(place).value().has(),
diff --git a/src/AggregateFunctions/Combinators/AggregateFunctionCombinatorFactory.cpp b/src/AggregateFunctions/Combinators/AggregateFunctionCombinatorFactory.cpp
index a42e4177ac5..428f7168826 100644
--- a/src/AggregateFunctions/Combinators/AggregateFunctionCombinatorFactory.cpp
+++ b/src/AggregateFunctions/Combinators/AggregateFunctionCombinatorFactory.cpp
@@ -1,6 +1,6 @@
#include "AggregateFunctionCombinatorFactory.h"
-#include
+#include
namespace DB
{
diff --git a/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp b/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp
index 9bd044dd89c..70aa1a41548 100644
--- a/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp
+++ b/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp
@@ -42,7 +42,7 @@ private:
return;
const auto & storage = table_node ? table_node->getStorage() : table_function_node->getStorage();
- bool is_final_supported = storage && storage->supportsFinal();
+ bool is_final_supported = storage && !storage->isRemote() && storage->supportsFinal();
if (!is_final_supported)
return;
diff --git a/src/Analyzer/QueryTreePassManager.cpp b/src/Analyzer/QueryTreePassManager.cpp
index 51f1fb6cc2f..f7919b6422c 100644
--- a/src/Analyzer/QueryTreePassManager.cpp
+++ b/src/Analyzer/QueryTreePassManager.cpp
@@ -192,7 +192,7 @@ void QueryTreePassManager::run(QueryTreeNodePtr query_tree_node)
void QueryTreePassManager::runOnlyResolve(QueryTreeNodePtr query_tree_node)
{
// Run only QueryAnalysisPass and GroupingFunctionsResolvePass passes.
- run(query_tree_node, 2);
+ run(query_tree_node, 3);
}
void QueryTreePassManager::run(QueryTreeNodePtr query_tree_node, size_t up_to_pass_index)
@@ -249,6 +249,7 @@ void addQueryTreePasses(QueryTreePassManager & manager, bool only_analyze)
{
manager.addPass(std::make_unique(only_analyze));
manager.addPass(std::make_unique());
+ manager.addPass(std::make_unique());
manager.addPass(std::make_unique());
manager.addPass(std::make_unique());
@@ -294,7 +295,6 @@ void addQueryTreePasses(QueryTreePassManager & manager, bool only_analyze)
manager.addPass(std::make_unique());
- manager.addPass(std::make_unique());
manager.addPass(std::make_unique());
manager.addPass(std::make_unique());
diff --git a/src/Backups/BackupFactory.h b/src/Backups/BackupFactory.h
index 4e752508577..e13a9a12ca2 100644
--- a/src/Backups/BackupFactory.h
+++ b/src/Backups/BackupFactory.h
@@ -39,6 +39,7 @@ public:
std::optional backup_uuid;
bool deduplicate_files = true;
bool allow_s3_native_copy = true;
+ bool allow_azure_native_copy = true;
bool use_same_s3_credentials_for_base_backup = false;
bool azure_attempt_to_create_container = true;
ReadSettings read_settings;
diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp
index a3998431674..331cace67d7 100644
--- a/src/Backups/BackupIO_AzureBlobStorage.cpp
+++ b/src/Backups/BackupIO_AzureBlobStorage.cpp
@@ -31,22 +31,28 @@ namespace ErrorCodes
BackupReaderAzureBlobStorage::BackupReaderAzureBlobStorage(
StorageAzureBlob::Configuration configuration_,
+ bool allow_azure_native_copy,
const ReadSettings & read_settings_,
const WriteSettings & write_settings_,
const ContextPtr & context_)
: BackupReaderDefault(read_settings_, write_settings_, getLogger("BackupReaderAzureBlobStorage"))
- , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.container, false, false}
+ , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.getConnectionURL().toString(), false, false}
, configuration(configuration_)
{
auto client_ptr = StorageAzureBlob::createClient(configuration, /* is_read_only */ false);
client_ptr->SetClickhouseOptions(Azure::Storage::Blobs::ClickhouseClientOptions{.IsClientForDisk=true});
- object_storage = std::make_unique("BackupReaderAzureBlobStorage",
- std::move(client_ptr),
- StorageAzureBlob::createSettings(context_),
- configuration_.container);
+ object_storage = std::make_unique(
+ "BackupReaderAzureBlobStorage",
+ std::move(client_ptr),
+ StorageAzureBlob::createSettings(context_),
+ configuration.container,
+ configuration.getConnectionURL().toString());
+
client = object_storage->getAzureBlobStorageClient();
- settings = object_storage->getSettings();
+ auto settings_copy = *object_storage->getSettings();
+ settings_copy.use_native_copy = allow_azure_native_copy;
+ settings = std::make_unique(settings_copy);
}
BackupReaderAzureBlobStorage::~BackupReaderAzureBlobStorage() = default;
@@ -76,9 +82,9 @@ void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup,
DiskPtr destination_disk, const String & destination_path, WriteMode write_mode)
{
auto destination_data_source_description = destination_disk->getDataSourceDescription();
- if ((destination_data_source_description.type == DataSourceType::ObjectStorage)
- && (destination_data_source_description.object_storage_type == ObjectStorageType::Azure)
- && (destination_data_source_description.is_encrypted == encrypted_in_backup))
+ LOG_TRACE(log, "Source description {}, desctionation description {}", data_source_description.description, destination_data_source_description.description);
+ if (destination_data_source_description.sameKind(data_source_description)
+ && destination_data_source_description.is_encrypted == encrypted_in_backup)
{
LOG_TRACE(log, "Copying {} from AzureBlobStorage to disk {}", path_in_backup, destination_disk->getName());
auto write_blob_function = [&](const Strings & blob_path, WriteMode mode, const std::optional &) -> size_t
@@ -116,12 +122,13 @@ void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup,
BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage(
StorageAzureBlob::Configuration configuration_,
+ bool allow_azure_native_copy,
const ReadSettings & read_settings_,
const WriteSettings & write_settings_,
const ContextPtr & context_,
bool attempt_to_create_container)
: BackupWriterDefault(read_settings_, write_settings_, getLogger("BackupWriterAzureBlobStorage"))
- , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.container, false, false}
+ , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.getConnectionURL().toString(), false, false}
, configuration(configuration_)
{
auto client_ptr = StorageAzureBlob::createClient(configuration, /* is_read_only */ false, attempt_to_create_container);
@@ -130,9 +137,12 @@ BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage(
object_storage = std::make_unique("BackupWriterAzureBlobStorage",
std::move(client_ptr),
StorageAzureBlob::createSettings(context_),
- configuration_.container);
+ configuration_.container,
+ configuration_.getConnectionURL().toString());
client = object_storage->getAzureBlobStorageClient();
- settings = object_storage->getSettings();
+ auto settings_copy = *object_storage->getSettings();
+ settings_copy.use_native_copy = allow_azure_native_copy;
+ settings = std::make_unique(settings_copy);
}
void BackupWriterAzureBlobStorage::copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,
@@ -140,7 +150,9 @@ void BackupWriterAzureBlobStorage::copyFileFromDisk(const String & path_in_backu
{
/// Use the native copy as a more optimal way to copy a file from AzureBlobStorage to AzureBlobStorage if it's possible.
auto source_data_source_description = src_disk->getDataSourceDescription();
- if (source_data_source_description.sameKind(data_source_description) && (source_data_source_description.is_encrypted == copy_encrypted))
+ LOG_TRACE(log, "Source description {}, desctionation description {}", source_data_source_description.description, data_source_description.description);
+ if (source_data_source_description.sameKind(data_source_description)
+ && source_data_source_description.is_encrypted == copy_encrypted)
{
/// getBlobPath() can return more than 3 elements if the file is stored as multiple objects in AzureBlobStorage container.
/// In this case we can't use the native copy.
diff --git a/src/Backups/BackupIO_AzureBlobStorage.h b/src/Backups/BackupIO_AzureBlobStorage.h
index f0b9aace4d4..3a909ab684a 100644
--- a/src/Backups/BackupIO_AzureBlobStorage.h
+++ b/src/Backups/BackupIO_AzureBlobStorage.h
@@ -16,7 +16,12 @@ namespace DB
class BackupReaderAzureBlobStorage : public BackupReaderDefault
{
public:
- BackupReaderAzureBlobStorage(StorageAzureBlob::Configuration configuration_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_);
+ BackupReaderAzureBlobStorage(
+ StorageAzureBlob::Configuration configuration_,
+ bool allow_azure_native_copy,
+ const ReadSettings & read_settings_,
+ const WriteSettings & write_settings_,
+ const ContextPtr & context_);
~BackupReaderAzureBlobStorage() override;
bool fileExists(const String & file_name) override;
@@ -37,7 +42,13 @@ private:
class BackupWriterAzureBlobStorage : public BackupWriterDefault
{
public:
- BackupWriterAzureBlobStorage(StorageAzureBlob::Configuration configuration_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_, bool attempt_to_create_container);
+ BackupWriterAzureBlobStorage(
+ StorageAzureBlob::Configuration configuration_,
+ bool allow_azure_native_copy,
+ const ReadSettings & read_settings_,
+ const WriteSettings & write_settings_,
+ const ContextPtr & context_,
+ bool attempt_to_create_container);
~BackupWriterAzureBlobStorage() override;
bool fileExists(const String & file_name) override;
diff --git a/src/Backups/BackupImpl.cpp b/src/Backups/BackupImpl.cpp
index 8c0989b8202..8f32c918c61 100644
--- a/src/Backups/BackupImpl.cpp
+++ b/src/Backups/BackupImpl.cpp
@@ -4,7 +4,7 @@
#include
#include
#include
-#include
+#include
#include
#include
#include
diff --git a/src/Backups/BackupSettings.cpp b/src/Backups/BackupSettings.cpp
index 06f49dfa448..e33880f88e3 100644
--- a/src/Backups/BackupSettings.cpp
+++ b/src/Backups/BackupSettings.cpp
@@ -27,6 +27,7 @@ namespace ErrorCodes
M(Bool, decrypt_files_from_encrypted_disks) \
M(Bool, deduplicate_files) \
M(Bool, allow_s3_native_copy) \
+ M(Bool, allow_azure_native_copy) \
M(Bool, use_same_s3_credentials_for_base_backup) \
M(Bool, azure_attempt_to_create_container) \
M(Bool, read_from_filesystem_cache) \
diff --git a/src/Backups/BackupSettings.h b/src/Backups/BackupSettings.h
index eccf4e90ce7..a6c4d5d7181 100644
--- a/src/Backups/BackupSettings.h
+++ b/src/Backups/BackupSettings.h
@@ -44,6 +44,9 @@ struct BackupSettings
/// Whether native copy is allowed (optimization for cloud storages, that sometimes could have bugs)
bool allow_s3_native_copy = true;
+ /// Whether native copy is allowed (optimization for cloud storages, that sometimes could have bugs)
+ bool allow_azure_native_copy = true;
+
/// Whether base backup to S3 should inherit credentials from the BACKUP query.
bool use_same_s3_credentials_for_base_backup = false;
diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp
index 9057dc9d198..69d9c52ebd9 100644
--- a/src/Backups/BackupsWorker.cpp
+++ b/src/Backups/BackupsWorker.cpp
@@ -598,6 +598,7 @@ void BackupsWorker::doBackup(
backup_create_params.backup_uuid = backup_settings.backup_uuid;
backup_create_params.deduplicate_files = backup_settings.deduplicate_files;
backup_create_params.allow_s3_native_copy = backup_settings.allow_s3_native_copy;
+ backup_create_params.allow_azure_native_copy = backup_settings.allow_azure_native_copy;
backup_create_params.use_same_s3_credentials_for_base_backup = backup_settings.use_same_s3_credentials_for_base_backup;
backup_create_params.azure_attempt_to_create_container = backup_settings.azure_attempt_to_create_container;
backup_create_params.read_settings = getReadSettingsForBackup(context, backup_settings);
diff --git a/src/Backups/registerBackupEngineAzureBlobStorage.cpp b/src/Backups/registerBackupEngineAzureBlobStorage.cpp
index 1b9545fc455..8b05965f472 100644
--- a/src/Backups/registerBackupEngineAzureBlobStorage.cpp
+++ b/src/Backups/registerBackupEngineAzureBlobStorage.cpp
@@ -135,10 +135,12 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory)
if (params.open_mode == IBackup::OpenMode::READ)
{
- auto reader = std::make_shared(configuration,
- params.read_settings,
- params.write_settings,
- params.context);
+ auto reader = std::make_shared(
+ configuration,
+ params.allow_azure_native_copy,
+ params.read_settings,
+ params.write_settings,
+ params.context);
return std::make_unique(
params.backup_info,
@@ -150,11 +152,13 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory)
}
else
{
- auto writer = std::make_shared(configuration,
- params.read_settings,
- params.write_settings,
- params.context,
- params.azure_attempt_to_create_container);
+ auto writer = std::make_shared(
+ configuration,
+ params.allow_azure_native_copy,
+ params.read_settings,
+ params.write_settings,
+ params.context,
+ params.azure_attempt_to_create_container);
return std::make_unique(
params.backup_info,
diff --git a/src/Bridge/IBridge.cpp b/src/Bridge/IBridge.cpp
index 6da2b7c06da..c25d7bd2fed 100644
--- a/src/Bridge/IBridge.cpp
+++ b/src/Bridge/IBridge.cpp
@@ -6,7 +6,7 @@
#include
#include
-#include
+#include
#include
#include
#include
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index da17bc1f41f..4e8946facda 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -307,7 +307,6 @@ endif()
target_link_libraries (clickhouse_common_io
PRIVATE
- string_utils
widechar_width
${LINK_LIBRARIES_ONLY_ON_X86_64}
PUBLIC
@@ -320,7 +319,6 @@ target_link_libraries (clickhouse_common_io
target_link_libraries (clickhouse_compression
PUBLIC
- string_utils
pcg_random
clickhouse_parsers
PRIVATE
@@ -410,7 +408,6 @@ dbms_target_link_libraries (
clickhouse_parsers
ch_contrib::lz4
Poco::JSON
- string_utils
PUBLIC
boost::system
clickhouse_common_io
@@ -645,7 +642,6 @@ if (ENABLE_TESTS)
dbms
clickhouse_common_config
clickhouse_common_zookeeper
- string_utils
hilite_comparator)
if (TARGET ch_contrib::simdjson)
diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index 67aba2256e8..b6f821794f1 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -18,7 +18,7 @@
#include
#include
#include
-#include
+#include
#include
#include
#include
@@ -710,8 +710,8 @@ void ClientBase::adjustSettings()
settings.input_format_values_allow_data_after_semicolon.changed = false;
}
- /// Do not limit pretty format output in case of --pager specified.
- if (!pager.empty())
+ /// Do not limit pretty format output in case of --pager specified or in case of stdout is not a tty.
+ if (!pager.empty() || !stdout_is_a_tty)
{
if (!global_context->getSettingsRef().output_format_pretty_max_rows.changed)
{
diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp
index 6e626c22527..19cd8cc4ee5 100644
--- a/src/Client/Connection.cpp
+++ b/src/Client/Connection.cpp
@@ -20,7 +20,7 @@
#include
#include
#include
-#include
+#include
#include
#include
#include
diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp
index fa5fdfb8c21..30e62548ad6 100644
--- a/src/Columns/ColumnNullable.cpp
+++ b/src/Columns/ColumnNullable.cpp
@@ -22,6 +22,7 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
extern const int ILLEGAL_COLUMN;
extern const int NOT_IMPLEMENTED;
+ extern const int BAD_ARGUMENTS;
}
@@ -116,6 +117,38 @@ void ColumnNullable::get(size_t n, Field & res) const
getNestedColumn().get(n, res);
}
+Float64 ColumnNullable::getFloat64(size_t n) const
+{
+ if (isNullAt(n))
+ throw Exception(ErrorCodes::BAD_ARGUMENTS, "The value of {} at {} is NULL while calling method getFloat64", getName(), n);
+ else
+ return getNestedColumn().getFloat64(n);
+}
+
+Float32 ColumnNullable::getFloat32(size_t n) const
+{
+ if (isNullAt(n))
+ throw Exception(ErrorCodes::BAD_ARGUMENTS, "The value of {} at {} is NULL while calling method getFloat32", getName(), n);
+ else
+ return getNestedColumn().getFloat32(n);
+}
+
+UInt64 ColumnNullable::getUInt(size_t n) const
+{
+ if (isNullAt(n))
+ throw Exception(ErrorCodes::BAD_ARGUMENTS, "The value of {} at {} is NULL while calling method getUInt", getName(), n);
+ else
+ return getNestedColumn().getUInt(n);
+}
+
+Int64 ColumnNullable::getInt(size_t n) const
+{
+ if (isNullAt(n))
+ throw Exception(ErrorCodes::BAD_ARGUMENTS, "The value of {} at {} is NULL while calling method getInt", getName(), n);
+ else
+ return getNestedColumn().getInt(n);
+}
+
void ColumnNullable::insertData(const char * pos, size_t length)
{
if (pos == nullptr)
diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h
index ef4bf4fa41b..c7ebb6ed7b6 100644
--- a/src/Columns/ColumnNullable.h
+++ b/src/Columns/ColumnNullable.h
@@ -57,6 +57,10 @@ public:
void get(size_t n, Field & res) const override;
bool getBool(size_t n) const override { return isNullAt(n) ? false : nested_column->getBool(n); }
UInt64 get64(size_t n) const override { return nested_column->get64(n); }
+ Float64 getFloat64(size_t n) const override;
+ Float32 getFloat32(size_t n) const override;
+ UInt64 getUInt(size_t n) const override;
+ Int64 getInt(size_t n) const override;
bool isDefaultAt(size_t n) const override { return isNullAt(n); }
StringRef getDataAt(size_t) const override;
/// Will insert null value if pos=nullptr
diff --git a/src/Common/CMakeLists.txt b/src/Common/CMakeLists.txt
index b83c8431f0a..d4802c28f53 100644
--- a/src/Common/CMakeLists.txt
+++ b/src/Common/CMakeLists.txt
@@ -1,5 +1,3 @@
-add_subdirectory(StringUtils)
-
if (ENABLE_BENCHMARKS)
add_subdirectory(benchmarks)
endif()
diff --git a/src/Common/Config/CMakeLists.txt b/src/Common/Config/CMakeLists.txt
index 009e2456322..09095ef5acc 100644
--- a/src/Common/Config/CMakeLists.txt
+++ b/src/Common/Config/CMakeLists.txt
@@ -13,8 +13,6 @@ target_link_libraries(clickhouse_common_config
clickhouse_common_zookeeper
common
Poco::XML
- PRIVATE
- string_utils
)
add_library(clickhouse_common_config_no_zookeeper_log ${SRCS})
@@ -23,8 +21,6 @@ target_link_libraries(clickhouse_common_config_no_zookeeper_log
clickhouse_common_zookeeper_no_log
common
Poco::XML
- PRIVATE
- string_utils
)
if (TARGET ch_contrib::yaml_cpp)
diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp
index 7930ef20153..c9832e8efd5 100644
--- a/src/Common/Config/ConfigProcessor.cpp
+++ b/src/Common/Config/ConfigProcessor.cpp
@@ -18,7 +18,7 @@
#include
#include
#include
-#include
+#include
#include
#include
#include
diff --git a/src/Common/CopyableAtomic.h b/src/Common/CopyableAtomic.h
new file mode 100644
index 00000000000..227fffe927f
--- /dev/null
+++ b/src/Common/CopyableAtomic.h
@@ -0,0 +1,39 @@
+#pragma once
+
+#include
+#include
+
+namespace DB
+{
+
+template
+struct CopyableAtomic
+{
+ CopyableAtomic(const CopyableAtomic & other)
+ : value(other.value.load())
+ {}
+
+ explicit CopyableAtomic(T && value_)
+ : value(std::forward(value_))
+ {}
+
+ CopyableAtomic & operator=(const CopyableAtomic & other)
+ {
+ value = other.value.load();
+ return *this;
+ }
+
+ CopyableAtomic & operator=(bool value_)
+ {
+ value = value_;
+ return *this;
+ }
+
+ explicit operator T() const { return value; }
+
+ const T & getValue() const { return value; }
+
+ std::atomic value;
+};
+
+}
diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp
index b9916130bb9..21b4d114d79 100644
--- a/src/Common/CurrentMetrics.cpp
+++ b/src/Common/CurrentMetrics.cpp
@@ -288,8 +288,10 @@
M(HTTPConnectionsTotal, "Total count of all sessions: stored in the pool and actively used right now for http hosts") \
\
M(AddressesActive, "Total count of addresses which are used for creation connections with connection pools") \
- M(AddressesBanned, "Total count of addresses which are banned as faulty for creation connections with connection pools") \
-
+ M(AddressesBanned, "Total count of addresses which are banned as faulty for creation connections with connection pools") \
+ \
+ M(FilteringMarksWithPrimaryKey, "Number of threads currently doing filtering of mark ranges by the primary key") \
+ M(FilteringMarksWithSecondaryKeys, "Number of threads currently doing filtering of mark ranges by secondary keys") \
#ifdef APPLY_FOR_EXTERNAL_METRICS
#define APPLY_FOR_METRICS(M) APPLY_FOR_BUILTIN_METRICS(M) APPLY_FOR_EXTERNAL_METRICS(M)
diff --git a/src/Common/FrequencyHolder.h b/src/Common/FrequencyHolder.h
index 64207dc5423..d6c32c225bf 100644
--- a/src/Common/FrequencyHolder.h
+++ b/src/Common/FrequencyHolder.h
@@ -12,7 +12,7 @@
#include
#include
-#include
+#include
#include
#include
#include
diff --git a/src/Common/HTTPHeaderFilter.cpp b/src/Common/HTTPHeaderFilter.cpp
index 9ad8dd6fccf..fd02fe1ecef 100644
--- a/src/Common/HTTPHeaderFilter.cpp
+++ b/src/Common/HTTPHeaderFilter.cpp
@@ -1,5 +1,5 @@
#include
-#include
+#include
#include
#include
diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp
index d3525010419..8c8e2163aad 100644
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@@ -360,6 +360,7 @@ The server successfully detected this situation and will download merged part fr
M(QueryProfilerSignalOverruns, "Number of times we drop processing of a query profiler signal due to overrun plus the number of signals that OS has not delivered due to overrun.") \
M(QueryProfilerConcurrencyOverruns, "Number of times we drop processing of a query profiler signal due to too many concurrent query profilers in other threads, which may indicate overload.") \
M(QueryProfilerRuns, "Number of times QueryProfiler had been run.") \
+ M(QueryProfilerErrors, "Invalid memory accesses during asynchronous stack unwinding.") \
\
M(CreatedLogEntryForMerge, "Successfully created log entry to merge parts in ReplicatedMergeTree.") \
M(NotCreatedLogEntryForMerge, "Log entry to merge parts in ReplicatedMergeTree is not created due to concurrent log update by another replica.") \
diff --git a/src/Common/ProxyConfigurationResolverProvider.cpp b/src/Common/ProxyConfigurationResolverProvider.cpp
index d15b4d98615..1a6dc1090ee 100644
--- a/src/Common/ProxyConfigurationResolverProvider.cpp
+++ b/src/Common/ProxyConfigurationResolverProvider.cpp
@@ -4,7 +4,7 @@
#include
#include
#include
-#include
+#include
#include
namespace DB
diff --git a/src/Common/ProxyListConfigurationResolver.cpp b/src/Common/ProxyListConfigurationResolver.cpp
index c9b8923929a..c527c89ea6b 100644
--- a/src/Common/ProxyListConfigurationResolver.cpp
+++ b/src/Common/ProxyListConfigurationResolver.cpp
@@ -1,6 +1,6 @@
#include
-#include
+#include
#include
#include
diff --git a/src/Common/QueryProfiler.cpp b/src/Common/QueryProfiler.cpp
index 4f72b4aba75..c3affbdd968 100644
--- a/src/Common/QueryProfiler.cpp
+++ b/src/Common/QueryProfiler.cpp
@@ -12,7 +12,6 @@
#include
#include
-#include
namespace CurrentMetrics
{
@@ -25,6 +24,7 @@ namespace ProfileEvents
extern const Event QueryProfilerSignalOverruns;
extern const Event QueryProfilerConcurrencyOverruns;
extern const Event QueryProfilerRuns;
+ extern const Event QueryProfilerErrors;
}
namespace DB
@@ -84,11 +84,29 @@ namespace
#endif
const auto signal_context = *reinterpret_cast(context);
- const StackTrace stack_trace(signal_context);
+ std::optional stack_trace;
+
+#if defined(SANITIZER)
+ constexpr bool sanitizer = true;
+#else
+ constexpr bool sanitizer = false;
+#endif
+
+ asynchronous_stack_unwinding = true;
+ if (sanitizer || 0 == sigsetjmp(asynchronous_stack_unwinding_signal_jump_buffer, 1))
+ {
+ stack_trace.emplace(signal_context);
+ }
+ else
+ {
+ ProfileEvents::incrementNoTrace(ProfileEvents::QueryProfilerErrors);
+ }
+ asynchronous_stack_unwinding = false;
+
+ if (stack_trace)
+ TraceSender::send(trace_type, *stack_trace, {});
- TraceSender::send(trace_type, stack_trace, {});
ProfileEvents::incrementNoTrace(ProfileEvents::QueryProfilerRuns);
-
errno = saved_errno;
}
diff --git a/src/Common/RemoteHostFilter.cpp b/src/Common/RemoteHostFilter.cpp
index 815be8902e6..fe7bf878596 100644
--- a/src/Common/RemoteHostFilter.cpp
+++ b/src/Common/RemoteHostFilter.cpp
@@ -1,7 +1,7 @@
#include
#include
#include
-#include
+#include
#include
#include
#include
diff --git a/src/Common/Scheduler/Nodes/DynamicResourceManager.cpp b/src/Common/Scheduler/Nodes/DynamicResourceManager.cpp
index b568b9245ba..01aa7df48d3 100644
--- a/src/Common/Scheduler/Nodes/DynamicResourceManager.cpp
+++ b/src/Common/Scheduler/Nodes/DynamicResourceManager.cpp
@@ -5,7 +5,7 @@
#include
#include
-#include
+#include
#include