diff --git a/.clang-tidy b/.clang-tidy index e2f318562ec..219ac263ab3 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -22,6 +22,7 @@ Checks: [ '-bugprone-exception-escape', '-bugprone-forward-declaration-namespace', '-bugprone-implicit-widening-of-multiplication-result', + '-bugprone-multi-level-implicit-pointer-conversion', '-bugprone-narrowing-conversions', '-bugprone-not-null-terminated-result', '-bugprone-reserved-identifier', # useful but too slow, TODO retry when https://reviews.llvm.org/rG1c282052624f9d0bd273bde0b47b30c96699c6c7 is merged @@ -98,6 +99,7 @@ Checks: [ '-modernize-use-nodiscard', '-modernize-use-trailing-return-type', + '-performance-enum-size', '-performance-inefficient-string-concatenation', '-performance-no-int-to-ptr', '-performance-avoid-endl', @@ -105,6 +107,7 @@ Checks: [ '-portability-simd-intrinsics', + '-readability-avoid-nested-conditional-operator', '-readability-avoid-unconditional-preprocessor-if', '-readability-braces-around-statements', '-readability-convert-member-functions-to-static', @@ -118,6 +121,12 @@ Checks: [ '-readability-magic-numbers', '-readability-named-parameter', '-readability-redundant-declaration', + '-readability-redundant-inline-specifier', + '-readability-redundant-member-init', # Useful but triggers another problem. Imagine a struct S with multiple String members. Structs are often instantiated via designated + # initializer S s{.s1 = [...], .s2 = [...], [...]}. In this case, compiler warning `missing-field-initializers` requires to specify all members which are not in-struct + # initialized (example: s1 in struct S { String s1; String s2{};}; is not in-struct initialized, therefore it must be specified at instantiation time). As explicitly + # specifying all members is tedious for large structs, `missing-field-initializers` makes programmers initialize as many members as possible in-struct. Clang-tidy + # warning `readability-redundant-member-init` does the opposite thing, both are not compatible with each other. '-readability-simplify-boolean-expr', '-readability-suspicious-call-argument', '-readability-uppercase-literal-suffix', @@ -125,17 +134,6 @@ Checks: [ '-zircon-*', - # These are new in clang-18, and we have to sort them out: - '-readability-avoid-nested-conditional-operator', - '-modernize-use-designated-initializers', - '-performance-enum-size', - '-readability-redundant-inline-specifier', - '-readability-redundant-member-init', - '-bugprone-crtp-constructor-accessibility', - '-bugprone-suspicious-stringview-data-usage', - '-bugprone-multi-level-implicit-pointer-conversion', - '-cert-err33-c', - # This is a good check, but clang-tidy crashes, see https://github.com/llvm/llvm-project/issues/91872 '-modernize-use-constraints', # https://github.com/abseil/abseil-cpp/issues/1667 diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 9f16e32707e..f20e987db97 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -130,15 +130,21 @@ jobs: with: stage: Tests_2 data: ${{ needs.RunConfig.outputs.data }} + # stage for jobs that do not prohibit merge + Tests_3: + needs: [RunConfig, Tests_1, Tests_2] + if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_3') }} + uses: ./.github/workflows/reusable_test_stage.yml + with: + stage: Tests_3 + data: ${{ needs.RunConfig.outputs.data }} ################################# Reports ################################# - # Reports should by run even if Builds_1/2 fail, so put them separatly in wf (not in Tests_1/2) + # Reports should by run even if Builds_1/2 fail, so put them separately in wf (not in Tests_1/2) Builds_1_Report: # run report check for failed builds to indicate the CI error - if: ${{ !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse build check') }} - needs: - - RunConfig - - Builds_1 + if: ${{ !cancelled() && needs.StyleCheck.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse build check') }} + needs: [RunConfig, StyleCheck, Builds_1] uses: ./.github/workflows/reusable_test.yml with: test_name: ClickHouse build check @@ -146,25 +152,39 @@ jobs: data: ${{ needs.RunConfig.outputs.data }} Builds_2_Report: # run report check for failed builds to indicate the CI error - if: ${{ !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse special build check') }} - needs: - - RunConfig - - Builds_2 + if: ${{ !cancelled() && needs.StyleCheck.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse special build check') }} + needs: [RunConfig, StyleCheck, Builds_2] uses: ./.github/workflows/reusable_test.yml with: test_name: ClickHouse special build check runner_type: style-checker-aarch64 data: ${{ needs.RunConfig.outputs.data }} + CheckReadyForMerge: + if: ${{ !cancelled() && needs.StyleCheck.result == 'success' }} + needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_1_Report, Builds_2_Report, Tests_1, Tests_2] + runs-on: [self-hosted, style-checker-aarch64] + steps: + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + filter: tree:0 + - name: Check and set merge status + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} + ################################# Stage Final ################################# # FinishCheck: if: ${{ !failure() && !cancelled() }} - needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_1_Report, Builds_2_Report, Tests_1, Tests_2] + needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_1_Report, Builds_2_Report, Tests_1, Tests_2, Tests_3] runs-on: [self-hosted, style-checker] steps: - name: Check out repository code uses: ClickHouse/checkout@v1 + with: + filter: tree:0 - name: Finish label run: | cd "$GITHUB_WORKSPACE/tests/ci" diff --git a/CMakeLists.txt b/CMakeLists.txt index abbc48ab23a..2d51c1b242f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -61,9 +61,11 @@ if (ENABLE_CHECK_HEAVY_BUILDS) # set CPU time limit to 1000 seconds set (RLIMIT_CPU 1000) - # Sanitizers are too heavy - if (SANITIZE OR SANITIZE_COVERAGE OR WITH_COVERAGE) - set (RLIMIT_DATA 10000000000) # 10G + # Sanitizers are too heavy. Some architectures too. + if (SANITIZE OR SANITIZE_COVERAGE OR WITH_COVERAGE OR ARCH_RISCV64 OR ARCH_LOONGARCH64) + # Twice as large + set (RLIMIT_DATA 10000000000) + set (RLIMIT_AS 20000000000) endif() # For some files currently building RISCV64 might be too slow. TODO: Improve compilation times per file diff --git a/base/base/cgroupsv2.cpp b/base/base/cgroupsv2.cpp index bea2e99fa51..f20b9daf22e 100644 --- a/base/base/cgroupsv2.cpp +++ b/base/base/cgroupsv2.cpp @@ -9,11 +9,18 @@ bool cgroupsV2Enabled() { #if defined(OS_LINUX) - /// This file exists iff the host has cgroups v2 enabled. - auto controllers_file = default_cgroups_mount / "cgroup.controllers"; - if (!std::filesystem::exists(controllers_file)) - return false; - return true; + try + { + /// This file exists iff the host has cgroups v2 enabled. + auto controllers_file = default_cgroups_mount / "cgroup.controllers"; + if (!std::filesystem::exists(controllers_file)) + return false; + return true; + } + catch (const std::filesystem::filesystem_error &) /// all "underlying OS API errors", typically: permission denied + { + return false; /// not logging the exception as most callers fall back to cgroups v1 + } #else return false; #endif diff --git a/contrib/libunwind b/contrib/libunwind index 854538ce337..d6a01c46327 160000 --- a/contrib/libunwind +++ b/contrib/libunwind @@ -1 +1 @@ -Subproject commit 854538ce337d631b619010528adff22cd58f9dce +Subproject commit d6a01c46327e56fd86beb8aaa31591fcd9a6b7df diff --git a/docs/en/operations/backup.md b/docs/en/operations/backup.md index 2ba50b39934..46c24ad8491 100644 --- a/docs/en/operations/backup.md +++ b/docs/en/operations/backup.md @@ -22,7 +22,7 @@ description: In order to effectively mitigate possible human errors, you should TEMPORARY TABLE table_name [AS table_name_in_backup] | VIEW view_name [AS view_name_in_backup] ALL TEMPORARY TABLES [EXCEPT ...] | - ALL DATABASES [EXCEPT ...] } [,...] + ALL [EXCEPT ...] } [,...] [ON CLUSTER 'cluster_name'] TO|FROM File('/') | Disk('', '/') | S3('/', '', '') [SETTINGS base_backup = File('/') | Disk(...) | S3('/', '', '')] diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md index 7005783dd60..9b316960750 100644 --- a/docs/en/operations/storing-data.md +++ b/docs/en/operations/storing-data.md @@ -7,27 +7,27 @@ title: "External Disks for Storing Data" Data, processed in ClickHouse, is usually stored in the local file system — on the same machine with the ClickHouse server. That requires large-capacity disks, which can be expensive enough. To avoid that you can store the data remotely. Various storages are supported: 1. [Amazon S3](https://aws.amazon.com/s3/) object storage. -2. The Hadoop Distributed File System ([HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html)) -3. [Azure Blob Storage](https://azure.microsoft.com/en-us/products/storage/blobs). +2. [Azure Blob Storage](https://azure.microsoft.com/en-us/products/storage/blobs). +3. Unsupported: The Hadoop Distributed File System ([HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html)) :::note ClickHouse also has support for external table engines, which are different from external storage option described on this page as they allow to read data stored in some general file format (like Parquet), while on this page we are describing storage configuration for ClickHouse `MergeTree` family or `Log` family tables. 1. to work with data stored on `Amazon S3` disks, use [S3](/docs/en/engines/table-engines/integrations/s3.md) table engine. -2. to work with data in the Hadoop Distributed File System — [HDFS](/docs/en/engines/table-engines/integrations/hdfs.md) table engine. -3. to work with data stored in Azure Blob Storage use [AzureBlobStorage](/docs/en/engines/table-engines/integrations/azureBlobStorage.md) table engine. +2. to work with data stored in Azure Blob Storage use [AzureBlobStorage](/docs/en/engines/table-engines/integrations/azureBlobStorage.md) table engine. +3. Unsupported: to work with data in the Hadoop Distributed File System — [HDFS](/docs/en/engines/table-engines/integrations/hdfs.md) table engine. ::: ## Configuring external storage {#configuring-external-storage} -[MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) and [Log](/docs/en/engines/table-engines/log-family/log.md) family table engines can store data to `S3`, `AzureBlobStorage`, `HDFS` using a disk with types `s3`, `azure_blob_storage`, `hdfs` accordingly. +[MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) and [Log](/docs/en/engines/table-engines/log-family/log.md) family table engines can store data to `S3`, `AzureBlobStorage`, `HDFS` (unsupported) using a disk with types `s3`, `azure_blob_storage`, `hdfs` (unsupported) accordingly. Disk configuration requires: -1. `type` section, equal to one of `s3`, `azure_blob_storage`, `hdfs`, `local_blob_storage`, `web`. +1. `type` section, equal to one of `s3`, `azure_blob_storage`, `hdfs` (unsupported), `local_blob_storage`, `web`. 2. Configuration of a specific external storage type. Starting from 24.1 clickhouse version, it is possible to use a new configuration option. It requires to specify: 1. `type` equal to `object_storage` -2. `object_storage_type`, equal to one of `s3`, `azure_blob_storage` (or just `azure` from `24.3`), `hdfs`, `local_blob_storage` (or just `local` from `24.3`), `web`. +2. `object_storage_type`, equal to one of `s3`, `azure_blob_storage` (or just `azure` from `24.3`), `hdfs` (unsupported), `local_blob_storage` (or just `local` from `24.3`), `web`. Optionally, `metadata_type` can be specified (it is equal to `local` by default), but it can also be set to `plain`, `web` and, starting from `24.4`, `plain_rewritable`. Usage of `plain` metadata type is described in [plain storage section](/docs/en/operations/storing-data.md/#storing-data-on-webserver), `web` metadata type can be used only with `web` object storage type, `local` metadata type stores metadata files locally (each metadata files contains mapping to files in object storage and some additional meta information about them). @@ -328,7 +328,7 @@ Configuration: ``` -Starting from `24.1` it is possible configure any object storage disk (`s3`, `azure`, `hdfs`, `local`) using `plain` metadata type. +Starting from `24.1` it is possible configure any object storage disk (`s3`, `azure`, `hdfs` (unsupported), `local`) using `plain` metadata type. Configuration: ``` xml @@ -428,12 +428,14 @@ Examples of working configurations can be found in integration tests directory ( Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher. This feature is not recommended for production use. ::: -## Using HDFS storage {#hdfs-storage} +## Using HDFS storage (Unsupported) In this sample configuration: -- the disk is of type `hdfs` +- the disk is of type `hdfs` (unsupported) - the data is hosted at `hdfs://hdfs1:9000/clickhouse/` +By the way, HDFS is unsupported and therefore there might be issues when using it. Feel free to make a pull request with the fix if any issue arises. + ```xml @@ -464,9 +466,11 @@ In this sample configuration: ``` +Keep in mind that HDFS may not work in corner cases. + ### Using Data Encryption {#encrypted-virtual-file-system} -You can encrypt the data stored on [S3](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-s3), or [HDFS](#configuring-hdfs) external disks, or on a local disk. To turn on the encryption mode, in the configuration file you must define a disk with the type `encrypted` and choose a disk on which the data will be saved. An `encrypted` disk ciphers all written files on the fly, and when you read files from an `encrypted` disk it deciphers them automatically. So you can work with an `encrypted` disk like with a normal one. +You can encrypt the data stored on [S3](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-s3), or [HDFS](#configuring-hdfs) (unsupported) external disks, or on a local disk. To turn on the encryption mode, in the configuration file you must define a disk with the type `encrypted` and choose a disk on which the data will be saved. An `encrypted` disk ciphers all written files on the fly, and when you read files from an `encrypted` disk it deciphers them automatically. So you can work with an `encrypted` disk like with a normal one. Example of disk configuration: @@ -529,7 +533,7 @@ Example of disk configuration: It is possible to configure local cache over disks in storage configuration starting from version 22.3. For versions 22.3 - 22.7 cache is supported only for `s3` disk type. For versions >= 22.8 cache is supported for any disk type: S3, Azure, Local, Encrypted, etc. -For versions >= 23.5 cache is supported only for remote disk types: S3, Azure, HDFS. +For versions >= 23.5 cache is supported only for remote disk types: S3, Azure, HDFS (unsupported). Cache uses `LRU` cache policy. @@ -971,7 +975,7 @@ Use [http_max_single_read_retries](/docs/en/operations/settings/settings.md/#htt ### Zero-copy Replication (not ready for production) {#zero-copy} -Zero-copy replication is possible, but not recommended, with `S3` and `HDFS` disks. Zero-copy replication means that if the data is stored remotely on several machines and needs to be synchronized, then only the metadata is replicated (paths to the data parts), but not the data itself. +Zero-copy replication is possible, but not recommended, with `S3` and `HDFS` (unsupported) disks. Zero-copy replication means that if the data is stored remotely on several machines and needs to be synchronized, then only the metadata is replicated (paths to the data parts), but not the data itself. :::note Zero-copy replication is not ready for production Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher. This feature is not recommended for production use. diff --git a/docs/en/sql-reference/aggregate-functions/reference/analysis_of_variance.md b/docs/en/sql-reference/aggregate-functions/reference/analysis_of_variance.md new file mode 100644 index 00000000000..d9b44b3ff07 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/analysis_of_variance.md @@ -0,0 +1,45 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/analysis_of_variance +sidebar_position: 6 +--- + +# analysisOfVariance + +Provides a statistical test for one-way analysis of variance (ANOVA test). It is a test over several groups of normally distributed observations to find out whether all groups have the same mean or not. + +**Syntax** + +```sql +analysisOfVariance(val, group_no) +``` + +Aliases: `anova` + +**Parameters** +- `val`: value. +- `group_no` : group number that `val` belongs to. + +:::note +Groups are enumerated starting from 0 and there should be at least two groups to perform a test. +There should be at least one group with the number of observations greater than one. +::: + +**Returned value** + +- `(f_statistic, p_value)`. [Tuple](../../data-types/tuple.md)([Float64](../../data-types/float.md), [Float64](../../data-types/float.md)). + +**Example** + +Query: + +```sql +SELECT analysisOfVariance(number, number % 2) FROM numbers(1048575); +``` + +Result: + +```response +┌─analysisOfVariance(number, modulo(number, 2))─┐ +│ (0,1) │ +└───────────────────────────────────────────────┘ +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/index.md b/docs/en/sql-reference/aggregate-functions/reference/index.md index e9a7fe4fc2b..451ee2aae9d 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/index.md +++ b/docs/en/sql-reference/aggregate-functions/reference/index.md @@ -37,6 +37,7 @@ Standard aggregate functions: ClickHouse-specific aggregate functions: +- [analysisOfVariance](/docs/en/sql-reference/aggregate-functions/reference/analysis_of_variance.md) - [any](/docs/en/sql-reference/aggregate-functions/reference/any_respect_nulls.md) - [anyHeavy](/docs/en/sql-reference/aggregate-functions/reference/anyheavy.md) - [anyLast](/docs/en/sql-reference/aggregate-functions/reference/anylast.md) diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index 0d91de2dad8..4640882f2be 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -162,7 +162,7 @@ if (ARCH_AMD64 AND OS_LINUX AND NOT OS_ANDROID) set (HARMFUL_LIB harmful) endif () -target_link_libraries (clickhouse PRIVATE clickhouse_common_io string_utils ${HARMFUL_LIB}) +target_link_libraries (clickhouse PRIVATE clickhouse_common_io ${HARMFUL_LIB}) target_include_directories (clickhouse PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) if (ENABLE_CLICKHOUSE_KEEPER) diff --git a/programs/client/CMakeLists.txt b/programs/client/CMakeLists.txt index e160355ef7b..f8ef8ccaf65 100644 --- a/programs/client/CMakeLists.txt +++ b/programs/client/CMakeLists.txt @@ -10,7 +10,6 @@ set (CLICKHOUSE_CLIENT_LINK clickhouse_common_io clickhouse_functions clickhouse_parsers - string_utils ) if (TARGET ch_rust::skim) diff --git a/programs/format/Format.cpp b/programs/format/Format.cpp index d4b975ce1e8..1b91e7ceaf3 100644 --- a/programs/format/Format.cpp +++ b/programs/format/Format.cpp @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include diff --git a/programs/git-import/git-import.cpp b/programs/git-import/git-import.cpp index eaf85df67b1..5430c4b0a42 100644 --- a/programs/git-import/git-import.cpp +++ b/programs/git-import/git-import.cpp @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/programs/keeper/clickhouse-keeper.cpp b/programs/keeper/clickhouse-keeper.cpp index be2686d936b..f2f91930ac0 100644 --- a/programs/keeper/clickhouse-keeper.cpp +++ b/programs/keeper/clickhouse-keeper.cpp @@ -1,4 +1,4 @@ -#include +#include #include "config_tools.h" diff --git a/programs/library-bridge/ExternalDictionaryLibraryUtils.h b/programs/library-bridge/ExternalDictionaryLibraryUtils.h index e6bf8f2a4c3..2eb44022742 100644 --- a/programs/library-bridge/ExternalDictionaryLibraryUtils.h +++ b/programs/library-bridge/ExternalDictionaryLibraryUtils.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include diff --git a/programs/main.cpp b/programs/main.cpp index 4bb73399719..bc8476e4ce4 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -15,7 +15,7 @@ #include "config_tools.h" -#include +#include #include #include diff --git a/programs/obfuscator/Obfuscator.cpp b/programs/obfuscator/Obfuscator.cpp index 8035f053b41..688ae1a1143 100644 --- a/programs/obfuscator/Obfuscator.cpp +++ b/programs/obfuscator/Obfuscator.cpp @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/programs/odbc-bridge/validateODBCConnectionString.cpp b/programs/odbc-bridge/validateODBCConnectionString.cpp index 6c6e11162b4..72c3c9bddca 100644 --- a/programs/odbc-bridge/validateODBCConnectionString.cpp +++ b/programs/odbc-bridge/validateODBCConnectionString.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include "validateODBCConnectionString.h" diff --git a/programs/server/CMakeLists.txt b/programs/server/CMakeLists.txt index 81440b03690..76d201cc924 100644 --- a/programs/server/CMakeLists.txt +++ b/programs/server/CMakeLists.txt @@ -13,7 +13,6 @@ set (CLICKHOUSE_SERVER_LINK clickhouse_parsers clickhouse_storages_system clickhouse_table_functions - string_utils ${LINK_RESOURCE_LIB} diff --git a/src/Access/User.cpp b/src/Access/User.cpp index ef5cf722113..6a296706baf 100644 --- a/src/Access/User.cpp +++ b/src/Access/User.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include diff --git a/src/Access/UsersConfigAccessStorage.cpp b/src/Access/UsersConfigAccessStorage.cpp index 908ff780c62..1f9a977bab6 100644 --- a/src/Access/UsersConfigAccessStorage.cpp +++ b/src/Access/UsersConfigAccessStorage.cpp @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/AggregateFunctions/Combinators/AggregateFunctionCombinatorFactory.cpp b/src/AggregateFunctions/Combinators/AggregateFunctionCombinatorFactory.cpp index a42e4177ac5..428f7168826 100644 --- a/src/AggregateFunctions/Combinators/AggregateFunctionCombinatorFactory.cpp +++ b/src/AggregateFunctions/Combinators/AggregateFunctionCombinatorFactory.cpp @@ -1,6 +1,6 @@ #include "AggregateFunctionCombinatorFactory.h" -#include +#include namespace DB { diff --git a/src/Backups/BackupImpl.cpp b/src/Backups/BackupImpl.cpp index 8c0989b8202..8f32c918c61 100644 --- a/src/Backups/BackupImpl.cpp +++ b/src/Backups/BackupImpl.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Bridge/IBridge.cpp b/src/Bridge/IBridge.cpp index 6da2b7c06da..c25d7bd2fed 100644 --- a/src/Bridge/IBridge.cpp +++ b/src/Bridge/IBridge.cpp @@ -6,7 +6,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index da17bc1f41f..4e8946facda 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -307,7 +307,6 @@ endif() target_link_libraries (clickhouse_common_io PRIVATE - string_utils widechar_width ${LINK_LIBRARIES_ONLY_ON_X86_64} PUBLIC @@ -320,7 +319,6 @@ target_link_libraries (clickhouse_common_io target_link_libraries (clickhouse_compression PUBLIC - string_utils pcg_random clickhouse_parsers PRIVATE @@ -410,7 +408,6 @@ dbms_target_link_libraries ( clickhouse_parsers ch_contrib::lz4 Poco::JSON - string_utils PUBLIC boost::system clickhouse_common_io @@ -645,7 +642,6 @@ if (ENABLE_TESTS) dbms clickhouse_common_config clickhouse_common_zookeeper - string_utils hilite_comparator) if (TARGET ch_contrib::simdjson) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 4441d884754..b6f821794f1 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index 6e626c22527..19cd8cc4ee5 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Common/CMakeLists.txt b/src/Common/CMakeLists.txt index b83c8431f0a..d4802c28f53 100644 --- a/src/Common/CMakeLists.txt +++ b/src/Common/CMakeLists.txt @@ -1,5 +1,3 @@ -add_subdirectory(StringUtils) - if (ENABLE_BENCHMARKS) add_subdirectory(benchmarks) endif() diff --git a/src/Common/Config/CMakeLists.txt b/src/Common/Config/CMakeLists.txt index 009e2456322..09095ef5acc 100644 --- a/src/Common/Config/CMakeLists.txt +++ b/src/Common/Config/CMakeLists.txt @@ -13,8 +13,6 @@ target_link_libraries(clickhouse_common_config clickhouse_common_zookeeper common Poco::XML - PRIVATE - string_utils ) add_library(clickhouse_common_config_no_zookeeper_log ${SRCS}) @@ -23,8 +21,6 @@ target_link_libraries(clickhouse_common_config_no_zookeeper_log clickhouse_common_zookeeper_no_log common Poco::XML - PRIVATE - string_utils ) if (TARGET ch_contrib::yaml_cpp) diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index 7930ef20153..c9832e8efd5 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Common/FrequencyHolder.h b/src/Common/FrequencyHolder.h index 64207dc5423..d6c32c225bf 100644 --- a/src/Common/FrequencyHolder.h +++ b/src/Common/FrequencyHolder.h @@ -12,7 +12,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/Common/HTTPHeaderFilter.cpp b/src/Common/HTTPHeaderFilter.cpp index 9ad8dd6fccf..fd02fe1ecef 100644 --- a/src/Common/HTTPHeaderFilter.cpp +++ b/src/Common/HTTPHeaderFilter.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index d3525010419..8c8e2163aad 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -360,6 +360,7 @@ The server successfully detected this situation and will download merged part fr M(QueryProfilerSignalOverruns, "Number of times we drop processing of a query profiler signal due to overrun plus the number of signals that OS has not delivered due to overrun.") \ M(QueryProfilerConcurrencyOverruns, "Number of times we drop processing of a query profiler signal due to too many concurrent query profilers in other threads, which may indicate overload.") \ M(QueryProfilerRuns, "Number of times QueryProfiler had been run.") \ + M(QueryProfilerErrors, "Invalid memory accesses during asynchronous stack unwinding.") \ \ M(CreatedLogEntryForMerge, "Successfully created log entry to merge parts in ReplicatedMergeTree.") \ M(NotCreatedLogEntryForMerge, "Log entry to merge parts in ReplicatedMergeTree is not created due to concurrent log update by another replica.") \ diff --git a/src/Common/ProxyConfigurationResolverProvider.cpp b/src/Common/ProxyConfigurationResolverProvider.cpp index d15b4d98615..1a6dc1090ee 100644 --- a/src/Common/ProxyConfigurationResolverProvider.cpp +++ b/src/Common/ProxyConfigurationResolverProvider.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include namespace DB diff --git a/src/Common/ProxyListConfigurationResolver.cpp b/src/Common/ProxyListConfigurationResolver.cpp index c9b8923929a..c527c89ea6b 100644 --- a/src/Common/ProxyListConfigurationResolver.cpp +++ b/src/Common/ProxyListConfigurationResolver.cpp @@ -1,6 +1,6 @@ #include -#include +#include #include #include diff --git a/src/Common/QueryProfiler.cpp b/src/Common/QueryProfiler.cpp index 4f72b4aba75..c3affbdd968 100644 --- a/src/Common/QueryProfiler.cpp +++ b/src/Common/QueryProfiler.cpp @@ -12,7 +12,6 @@ #include #include -#include namespace CurrentMetrics { @@ -25,6 +24,7 @@ namespace ProfileEvents extern const Event QueryProfilerSignalOverruns; extern const Event QueryProfilerConcurrencyOverruns; extern const Event QueryProfilerRuns; + extern const Event QueryProfilerErrors; } namespace DB @@ -84,11 +84,29 @@ namespace #endif const auto signal_context = *reinterpret_cast(context); - const StackTrace stack_trace(signal_context); + std::optional stack_trace; + +#if defined(SANITIZER) + constexpr bool sanitizer = true; +#else + constexpr bool sanitizer = false; +#endif + + asynchronous_stack_unwinding = true; + if (sanitizer || 0 == sigsetjmp(asynchronous_stack_unwinding_signal_jump_buffer, 1)) + { + stack_trace.emplace(signal_context); + } + else + { + ProfileEvents::incrementNoTrace(ProfileEvents::QueryProfilerErrors); + } + asynchronous_stack_unwinding = false; + + if (stack_trace) + TraceSender::send(trace_type, *stack_trace, {}); - TraceSender::send(trace_type, stack_trace, {}); ProfileEvents::incrementNoTrace(ProfileEvents::QueryProfilerRuns); - errno = saved_errno; } diff --git a/src/Common/RemoteHostFilter.cpp b/src/Common/RemoteHostFilter.cpp index 815be8902e6..fe7bf878596 100644 --- a/src/Common/RemoteHostFilter.cpp +++ b/src/Common/RemoteHostFilter.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Common/Scheduler/Nodes/DynamicResourceManager.cpp b/src/Common/Scheduler/Nodes/DynamicResourceManager.cpp index b568b9245ba..01aa7df48d3 100644 --- a/src/Common/Scheduler/Nodes/DynamicResourceManager.cpp +++ b/src/Common/Scheduler/Nodes/DynamicResourceManager.cpp @@ -5,7 +5,7 @@ #include #include -#include +#include #include #include diff --git a/src/Common/SensitiveDataMasker.cpp b/src/Common/SensitiveDataMasker.cpp index 8c29b899841..a9f61a1c786 100644 --- a/src/Common/SensitiveDataMasker.cpp +++ b/src/Common/SensitiveDataMasker.cpp @@ -10,7 +10,7 @@ #include #include -#include +#include #include #ifndef NDEBUG diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index 6e6f5b42b36..239e957bdfe 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -560,3 +560,7 @@ void StackTrace::dropCache() std::lock_guard lock{stacktrace_cache_mutex}; cacheInstance().clear(); } + + +thread_local bool asynchronous_stack_unwinding = false; +thread_local sigjmp_buf asynchronous_stack_unwinding_signal_jump_buffer; diff --git a/src/Common/StackTrace.h b/src/Common/StackTrace.h index 06028c77034..4ce9a9281f3 100644 --- a/src/Common/StackTrace.h +++ b/src/Common/StackTrace.h @@ -8,6 +8,7 @@ #include #include #include +#include #ifdef OS_DARWIN // ucontext is not available without _XOPEN_SOURCE @@ -87,3 +88,8 @@ protected: }; std::string signalToErrorMessage(int sig, const siginfo_t & info, const ucontext_t & context); + +/// Special handling for errors during asynchronous stack unwinding, +/// Which is used in Query Profiler +extern thread_local bool asynchronous_stack_unwinding; +extern thread_local sigjmp_buf asynchronous_stack_unwinding_signal_jump_buffer; diff --git a/src/Common/StringSearcher.h b/src/Common/StringSearcher.h index b3065354f65..d7e706fcd80 100644 --- a/src/Common/StringSearcher.h +++ b/src/Common/StringSearcher.h @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/Common/StringUtils.cpp b/src/Common/StringUtils.cpp new file mode 100644 index 00000000000..18577e64c01 --- /dev/null +++ b/src/Common/StringUtils.cpp @@ -0,0 +1,87 @@ +#include + +#include + +#if USE_MULTITARGET_CODE +#include +#endif + + +namespace impl +{ + +bool startsWith(const std::string & s, const char * prefix, size_t prefix_size) +{ + return s.size() >= prefix_size && 0 == memcmp(s.data(), prefix, prefix_size); +} + +bool endsWith(const std::string & s, const char * suffix, size_t suffix_size) +{ + return s.size() >= suffix_size && 0 == memcmp(s.data() + s.size() - suffix_size, suffix, suffix_size); +} + +} + +DECLARE_DEFAULT_CODE( +bool isAllASCII(const UInt8 * data, size_t size) +{ + UInt8 mask = 0; + for (size_t i = 0; i < size; ++i) + mask |= data[i]; + + return !(mask & 0x80); +}) + +DECLARE_SSE42_SPECIFIC_CODE( +/// Copy from https://github.com/lemire/fastvalidate-utf-8/blob/master/include/simdasciicheck.h +bool isAllASCII(const UInt8 * data, size_t size) +{ + __m128i masks = _mm_setzero_si128(); + + size_t i = 0; + for (; i + 16 <= size; i += 16) + { + __m128i bytes = _mm_loadu_si128(reinterpret_cast(data + i)); + masks = _mm_or_si128(masks, bytes); + } + int mask = _mm_movemask_epi8(masks); + + UInt8 tail_mask = 0; + for (; i < size; i++) + tail_mask |= data[i]; + + mask |= (tail_mask & 0x80); + return !mask; +}) + +DECLARE_AVX2_SPECIFIC_CODE( +bool isAllASCII(const UInt8 * data, size_t size) +{ + __m256i masks = _mm256_setzero_si256(); + + size_t i = 0; + for (; i + 32 <= size; i += 32) + { + __m256i bytes = _mm256_loadu_si256(reinterpret_cast(data + i)); + masks = _mm256_or_si256(masks, bytes); + } + int mask = _mm256_movemask_epi8(masks); + + UInt8 tail_mask = 0; + for (; i < size; i++) + tail_mask |= data[i]; + + mask |= (tail_mask & 0x80); + return !mask; +}) + +bool isAllASCII(const UInt8 * data, size_t size) +{ +#if USE_MULTITARGET_CODE + if (isArchSupported(DB::TargetArch::AVX2)) + return TargetSpecific::AVX2::isAllASCII(data, size); + if (isArchSupported(DB::TargetArch::SSE42)) + return TargetSpecific::SSE42::isAllASCII(data, size); +#endif + return TargetSpecific::Default::isAllASCII(data, size); +} diff --git a/src/Common/StringUtils/StringUtils.h b/src/Common/StringUtils.h similarity index 95% rename from src/Common/StringUtils/StringUtils.h rename to src/Common/StringUtils.h index 4958ecc9476..fe5fc3c058f 100644 --- a/src/Common/StringUtils/StringUtils.h +++ b/src/Common/StringUtils.h @@ -7,8 +7,10 @@ #include #include +#include -namespace detail + +namespace impl { bool startsWith(const std::string & s, const char * prefix, size_t prefix_size); bool endsWith(const std::string & s, const char * suffix, size_t suffix_size); @@ -17,12 +19,12 @@ namespace detail inline bool startsWith(const std::string & s, const std::string & prefix) { - return detail::startsWith(s, prefix.data(), prefix.size()); + return impl::startsWith(s, prefix.data(), prefix.size()); } inline bool endsWith(const std::string & s, const std::string & suffix) { - return detail::endsWith(s, suffix.data(), suffix.size()); + return impl::endsWith(s, suffix.data(), suffix.size()); } @@ -30,12 +32,12 @@ inline bool endsWith(const std::string & s, const std::string & suffix) /// string that is known at compile time. inline bool startsWith(const std::string & s, const char * prefix) { - return detail::startsWith(s, prefix, strlen(prefix)); + return impl::startsWith(s, prefix, strlen(prefix)); } inline bool endsWith(const std::string & s, const char * suffix) { - return detail::endsWith(s, suffix, strlen(suffix)); + return impl::endsWith(s, suffix, strlen(suffix)); } /// Given an integer, return the adequate suffix for @@ -315,6 +317,9 @@ inline void trim(std::string & str, char c = ' ') trimLeft(str, c); } +/// If all characters in the string are ASCII, return true +bool isAllASCII(const UInt8 * data, size_t size); + constexpr bool containsGlobs(const std::string & str) { return str.find_first_of("*?{") != std::string::npos; diff --git a/src/Common/StringUtils/CMakeLists.txt b/src/Common/StringUtils/CMakeLists.txt deleted file mode 100644 index 57c196d335c..00000000000 --- a/src/Common/StringUtils/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -# These files are located in separate library, because they are used by separate products -# in places when no dependency on whole "dbms" library is possible. - -include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") - -add_headers_and_sources(clickhouse_common_stringutils .) - -add_library(string_utils ${clickhouse_common_stringutils_headers} ${clickhouse_common_stringutils_sources}) diff --git a/src/Common/StringUtils/StringUtils.cpp b/src/Common/StringUtils/StringUtils.cpp deleted file mode 100644 index 8a0b25dbdad..00000000000 --- a/src/Common/StringUtils/StringUtils.cpp +++ /dev/null @@ -1,17 +0,0 @@ -#include "StringUtils.h" - - -namespace detail -{ - -bool startsWith(const std::string & s, const char * prefix, size_t prefix_size) -{ - return s.size() >= prefix_size && 0 == memcmp(s.data(), prefix, prefix_size); -} - -bool endsWith(const std::string & s, const char * suffix, size_t suffix_size) -{ - return s.size() >= suffix_size && 0 == memcmp(s.data() + s.size() - suffix_size, suffix, suffix_size); -} - -} diff --git a/src/Common/TLDListsHolder.cpp b/src/Common/TLDListsHolder.cpp index c3991b86983..413d221090e 100644 --- a/src/Common/TLDListsHolder.cpp +++ b/src/Common/TLDListsHolder.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include diff --git a/src/Common/UTF8Helpers.cpp b/src/Common/UTF8Helpers.cpp index b8f5c000e75..8c8c8e8327b 100644 --- a/src/Common/UTF8Helpers.cpp +++ b/src/Common/UTF8Helpers.cpp @@ -1,14 +1,9 @@ -#include -#include #include +#include #include #include -#if USE_MULTITARGET_CODE -#include -#endif - namespace DB { namespace UTF8 @@ -208,7 +203,6 @@ size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t l } - size_t computeWidth(const UInt8 * data, size_t size, size_t prefix) noexcept { return computeWidthImpl(data, size, prefix, 0); @@ -219,71 +213,5 @@ size_t computeBytesBeforeWidth(const UInt8 * data, size_t size, size_t prefix, s return computeWidthImpl(data, size, prefix, limit); } - -DECLARE_DEFAULT_CODE( -bool isAllASCII(const UInt8 * data, size_t size) -{ - UInt8 mask = 0; - for (size_t i = 0; i < size; ++i) - mask |= data[i]; - - return !(mask & 0x80); -}) - -DECLARE_SSE42_SPECIFIC_CODE( -/// Copy from https://github.com/lemire/fastvalidate-utf-8/blob/master/include/simdasciicheck.h -bool isAllASCII(const UInt8 * data, size_t size) -{ - __m128i masks = _mm_setzero_si128(); - - size_t i = 0; - for (; i + 16 <= size; i += 16) - { - __m128i bytes = _mm_loadu_si128(reinterpret_cast(data + i)); - masks = _mm_or_si128(masks, bytes); - } - int mask = _mm_movemask_epi8(masks); - - UInt8 tail_mask = 0; - for (; i < size; i++) - tail_mask |= data[i]; - - mask |= (tail_mask & 0x80); - return !mask; -}) - -DECLARE_AVX2_SPECIFIC_CODE( -bool isAllASCII(const UInt8 * data, size_t size) -{ - __m256i masks = _mm256_setzero_si256(); - - size_t i = 0; - for (; i + 32 <= size; i += 32) - { - __m256i bytes = _mm256_loadu_si256(reinterpret_cast(data + i)); - masks = _mm256_or_si256(masks, bytes); - } - int mask = _mm256_movemask_epi8(masks); - - UInt8 tail_mask = 0; - for (; i < size; i++) - tail_mask |= data[i]; - - mask |= (tail_mask & 0x80); - return !mask; -}) - -bool isAllASCII(const UInt8* data, size_t size) -{ -#if USE_MULTITARGET_CODE - if (isArchSupported(TargetArch::AVX2)) - return TargetSpecific::AVX2::isAllASCII(data, size); - if (isArchSupported(TargetArch::SSE42)) - return TargetSpecific::SSE42::isAllASCII(data, size); -#endif - return TargetSpecific::Default::isAllASCII(data, size); -} - - } } diff --git a/src/Common/UTF8Helpers.h b/src/Common/UTF8Helpers.h index 933b62c7b63..b09d92bd731 100644 --- a/src/Common/UTF8Helpers.h +++ b/src/Common/UTF8Helpers.h @@ -136,10 +136,6 @@ size_t computeWidth(const UInt8 * data, size_t size, size_t prefix = 0) noexcept */ size_t computeBytesBeforeWidth(const UInt8 * data, size_t size, size_t prefix, size_t limit) noexcept; - -/// If all the characters in the string are ASCII, return true. -bool isAllASCII(const UInt8* data, size_t size); - } } diff --git a/src/Common/Volnitsky.h b/src/Common/Volnitsky.h index 6513bdb8bc3..3a148983790 100644 --- a/src/Common/Volnitsky.h +++ b/src/Common/Volnitsky.h @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Common/ZooKeeper/CMakeLists.txt b/src/Common/ZooKeeper/CMakeLists.txt index aa06375bd6a..8b6c420e565 100644 --- a/src/Common/ZooKeeper/CMakeLists.txt +++ b/src/Common/ZooKeeper/CMakeLists.txt @@ -12,8 +12,6 @@ target_link_libraries (clickhouse_common_zookeeper clickhouse_common_io clickhouse_compression common - PRIVATE - string_utils ) # for examples -- no logging (to avoid extra dependencies) @@ -23,8 +21,6 @@ target_link_libraries (clickhouse_common_zookeeper_no_log clickhouse_common_io clickhouse_compression common - PRIVATE - string_utils ) if (ENABLE_EXAMPLES) add_subdirectory(examples) diff --git a/src/Common/ZooKeeper/TestKeeper.cpp b/src/Common/ZooKeeper/TestKeeper.cpp index d02ad4523ad..51ad2e7c830 100644 --- a/src/Common/ZooKeeper/TestKeeper.cpp +++ b/src/Common/ZooKeeper/TestKeeper.cpp @@ -1,7 +1,7 @@ #include "Common/ZooKeeper/IKeeper.h" #include #include -#include +#include #include #include diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp index c62c5d0c143..be490d0bfc1 100644 --- a/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/src/Common/ZooKeeper/ZooKeeper.cpp @@ -19,7 +19,7 @@ #include #include "Common/ZooKeeper/IKeeper.h" #include -#include +#include #include #include diff --git a/src/Common/ZooKeeper/ZooKeeperArgs.cpp b/src/Common/ZooKeeper/ZooKeeperArgs.cpp index 40bd9d79a03..a581b6a7f38 100644 --- a/src/Common/ZooKeeper/ZooKeeperArgs.cpp +++ b/src/Common/ZooKeeper/ZooKeeperArgs.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include namespace DB diff --git a/src/Common/ZooKeeper/examples/CMakeLists.txt b/src/Common/ZooKeeper/examples/CMakeLists.txt index c5a93f2701e..678b302a512 100644 --- a/src/Common/ZooKeeper/examples/CMakeLists.txt +++ b/src/Common/ZooKeeper/examples/CMakeLists.txt @@ -7,7 +7,6 @@ clickhouse_add_executable(zkutil_test_commands_new_lib zkutil_test_commands_new_ target_link_libraries(zkutil_test_commands_new_lib PRIVATE clickhouse_common_zookeeper_no_log clickhouse_compression - string_utils dbms) clickhouse_add_executable(zkutil_test_async zkutil_test_async.cpp) diff --git a/src/Common/ZooKeeper/examples/zkutil_test_commands_new_lib.cpp b/src/Common/ZooKeeper/examples/zkutil_test_commands_new_lib.cpp index 414006d48a4..25d66b94b46 100644 --- a/src/Common/ZooKeeper/examples/zkutil_test_commands_new_lib.cpp +++ b/src/Common/ZooKeeper/examples/zkutil_test_commands_new_lib.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Common/escapeForFileName.cpp b/src/Common/escapeForFileName.cpp index a1f9bff28d0..2fe23245f49 100644 --- a/src/Common/escapeForFileName.cpp +++ b/src/Common/escapeForFileName.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include namespace DB diff --git a/src/Common/examples/CMakeLists.txt b/src/Common/examples/CMakeLists.txt index 90a238c9800..73e1396fb35 100644 --- a/src/Common/examples/CMakeLists.txt +++ b/src/Common/examples/CMakeLists.txt @@ -87,3 +87,6 @@ if (ENABLE_SSL) clickhouse_add_executable (encrypt_decrypt encrypt_decrypt.cpp) target_link_libraries (encrypt_decrypt PRIVATE dbms) endif() + +clickhouse_add_executable (check_pointer_valid check_pointer_valid.cpp) +target_link_libraries (check_pointer_valid PRIVATE clickhouse_common_io) diff --git a/src/Common/examples/check_pointer_valid.cpp b/src/Common/examples/check_pointer_valid.cpp new file mode 100644 index 00000000000..e59ebf43327 --- /dev/null +++ b/src/Common/examples/check_pointer_valid.cpp @@ -0,0 +1,53 @@ +#include +#include +#include +#include + + +/// This example demonstrates how is it possible to check if a pointer to memory is readable using a signal handler. + +thread_local bool checking_pointer = false; +thread_local jmp_buf signal_jump_buffer; + + +void signalHandler(int sig, siginfo_t *, void *) +{ + if (checking_pointer && sig == SIGSEGV) + siglongjmp(signal_jump_buffer, 1); +} + +bool isPointerValid(const void * ptr) +{ + checking_pointer = true; + if (0 == sigsetjmp(signal_jump_buffer, 1)) + { + char res; + memcpy(&res, ptr, 1); + __asm__ __volatile__("" :: "r"(res) : "memory"); + checking_pointer = false; + return true; + } + else + { + checking_pointer = false; + return false; + } +} + +int main(int, char **) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = signalHandler; + sa.sa_flags = SA_SIGINFO; + + if (sigemptyset(&sa.sa_mask) + || sigaddset(&sa.sa_mask, SIGSEGV) + || sigaction(SIGSEGV, &sa, nullptr)) + return 1; + + std::cerr << isPointerValid(reinterpret_cast(0x123456789)) << "\n"; + std::cerr << isPointerValid(&sa) << "\n"; + + return 0; +} diff --git a/src/Common/format.h b/src/Common/format.h index 27018f64064..3dbb88b4089 100644 --- a/src/Common/format.h +++ b/src/Common/format.h @@ -3,7 +3,7 @@ #include #include #include -#include +#include namespace DB { diff --git a/src/Common/formatIPv6.cpp b/src/Common/formatIPv6.cpp index 86e33beb7c3..341b3715d30 100644 --- a/src/Common/formatIPv6.cpp +++ b/src/Common/formatIPv6.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/src/Common/formatIPv6.h b/src/Common/formatIPv6.h index 3451eda6b3c..bb83e0381ef 100644 --- a/src/Common/formatIPv6.h +++ b/src/Common/formatIPv6.h @@ -7,7 +7,7 @@ #include #include #include -#include +#include constexpr size_t IPV4_BINARY_LENGTH = 4; constexpr size_t IPV6_BINARY_LENGTH = 16; diff --git a/src/Common/getMappedArea.cpp b/src/Common/getMappedArea.cpp index 4f40c604c6a..79191d68fb9 100644 --- a/src/Common/getMappedArea.cpp +++ b/src/Common/getMappedArea.cpp @@ -3,7 +3,7 @@ #if defined(OS_LINUX) -#include +#include #include #include #include diff --git a/src/Common/getMultipleKeysFromConfig.cpp b/src/Common/getMultipleKeysFromConfig.cpp index 7cf49fcc34d..6d6589a45a3 100644 --- a/src/Common/getMultipleKeysFromConfig.cpp +++ b/src/Common/getMultipleKeysFromConfig.cpp @@ -1,7 +1,7 @@ #include #include -#include +#include namespace DB { diff --git a/src/Coordination/FourLetterCommand.cpp b/src/Coordination/FourLetterCommand.cpp index 25254e10441..28902bc8591 100644 --- a/src/Coordination/FourLetterCommand.cpp +++ b/src/Coordination/FourLetterCommand.cpp @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include "Coordination/KeeperFeatureFlags.h" #include diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index df5c2e9e0c8..9bcd0608bf7 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index fdffca9b4ef..c6c82df2a72 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -144,6 +144,9 @@ static std::atomic_flag fatal_error_printed; */ static void signalHandler(int sig, siginfo_t * info, void * context) { + if (asynchronous_stack_unwinding && sig == SIGSEGV) + siglongjmp(asynchronous_stack_unwinding_signal_jump_buffer, 1); + DENY_ALLOCATIONS_IN_SCOPE; auto saved_errno = errno; /// We must restore previous value of errno in signal handler. @@ -185,6 +188,7 @@ static void signalHandler(int sig, siginfo_t * info, void * context) errno = saved_errno; } + static bool getenvBool(const char * name) { bool res = false; diff --git a/src/DataTypes/DataTypeFactory.cpp b/src/DataTypes/DataTypeFactory.cpp index 844384f3c95..427af090b91 100644 --- a/src/DataTypes/DataTypeFactory.cpp +++ b/src/DataTypes/DataTypeFactory.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/DataTypes/DataTypeMap.cpp b/src/DataTypes/DataTypeMap.cpp index d9f70e1659d..4d7ab63f966 100644 --- a/src/DataTypes/DataTypeMap.cpp +++ b/src/DataTypes/DataTypeMap.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include diff --git a/src/DataTypes/DataTypeTuple.cpp b/src/DataTypes/DataTypeTuple.cpp index 5bbd79160d4..ebee096613d 100644 --- a/src/DataTypes/DataTypeTuple.cpp +++ b/src/DataTypes/DataTypeTuple.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include diff --git a/src/DataTypes/NestedUtils.cpp b/src/DataTypes/NestedUtils.cpp index 6a56f885503..650559d21d9 100644 --- a/src/DataTypes/NestedUtils.cpp +++ b/src/DataTypes/NestedUtils.cpp @@ -3,7 +3,7 @@ #include #include -#include +#include #include "Columns/IColumn.h" #include diff --git a/src/DataTypes/Serializations/SerializationMap.cpp b/src/DataTypes/Serializations/SerializationMap.cpp index 7b6f87baf2e..49bc89687f1 100644 --- a/src/DataTypes/Serializations/SerializationMap.cpp +++ b/src/DataTypes/Serializations/SerializationMap.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/Dictionaries/CMakeLists.txt b/src/Dictionaries/CMakeLists.txt index 569acd9231a..783835356e6 100644 --- a/src/Dictionaries/CMakeLists.txt +++ b/src/Dictionaries/CMakeLists.txt @@ -39,7 +39,6 @@ target_link_libraries(clickhouse_dictionaries Poco::Data Poco::MongoDB Poco::Redis - string_utils ) target_link_libraries(clickhouse_dictionaries PUBLIC ch_contrib::abseil_swiss_tables) diff --git a/src/Dictionaries/DictionaryStructure.cpp b/src/Dictionaries/DictionaryStructure.cpp index 0b6bdea60a3..c2f2f4a8532 100644 --- a/src/Dictionaries/DictionaryStructure.cpp +++ b/src/Dictionaries/DictionaryStructure.cpp @@ -7,7 +7,7 @@ #include #include -#include +#include #include #include diff --git a/src/Dictionaries/FileDictionarySource.cpp b/src/Dictionaries/FileDictionarySource.cpp index 16a4ecaee75..fde46fb27f0 100644 --- a/src/Dictionaries/FileDictionarySource.cpp +++ b/src/Dictionaries/FileDictionarySource.cpp @@ -1,6 +1,6 @@ #include "FileDictionarySource.h" #include -#include +#include #include #include #include diff --git a/src/Disks/IVolume.cpp b/src/Disks/IVolume.cpp index d763c55c4aa..e6be0f36193 100644 --- a/src/Disks/IVolume.cpp +++ b/src/Disks/IVolume.cpp @@ -1,6 +1,6 @@ #include "IVolume.h" -#include +#include #include #include diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index 043e5b8ef8c..adbdd9d13aa 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp index c3114eb0b6f..35913613326 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.cpp +++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp @@ -4,7 +4,7 @@ #if USE_AWS_S3 -#include +#include #include #include #include diff --git a/src/Disks/VolumeJBOD.cpp b/src/Disks/VolumeJBOD.cpp index a0c71583a22..d0e9d32ff5e 100644 --- a/src/Disks/VolumeJBOD.cpp +++ b/src/Disks/VolumeJBOD.cpp @@ -1,6 +1,6 @@ #include "VolumeJBOD.h" -#include +#include #include #include #include diff --git a/src/Formats/CapnProtoSchema.cpp b/src/Formats/CapnProtoSchema.cpp index 559047a6f8d..6076dae4157 100644 --- a/src/Formats/CapnProtoSchema.cpp +++ b/src/Formats/CapnProtoSchema.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Formats/StructureToCapnProtoSchema.cpp b/src/Formats/StructureToCapnProtoSchema.cpp index 99298fadee1..cd45b19d3c0 100644 --- a/src/Formats/StructureToCapnProtoSchema.cpp +++ b/src/Formats/StructureToCapnProtoSchema.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Formats/StructureToProtobufSchema.cpp b/src/Formats/StructureToProtobufSchema.cpp index 178c0ae3cc2..9fd02969adb 100644 --- a/src/Formats/StructureToProtobufSchema.cpp +++ b/src/Formats/StructureToProtobufSchema.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include namespace DB { diff --git a/src/Functions/ExtractString.h b/src/Functions/ExtractString.h index aa0e1b04835..6beb8be830a 100644 --- a/src/Functions/ExtractString.h +++ b/src/Functions/ExtractString.h @@ -1,6 +1,6 @@ #pragma once #include -#include +#include #include #include diff --git a/src/Functions/FunctionTokens.h b/src/Functions/FunctionTokens.h index c80152bc71d..d6cf6a24983 100644 --- a/src/Functions/FunctionTokens.h +++ b/src/Functions/FunctionTokens.h @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Functions/FunctionsProgrammingClassification.cpp b/src/Functions/FunctionsProgrammingClassification.cpp index a93e1d9a87d..c01e47ad0d7 100644 --- a/src/Functions/FunctionsProgrammingClassification.cpp +++ b/src/Functions/FunctionsProgrammingClassification.cpp @@ -2,7 +2,7 @@ #if USE_NLP -#include +#include #include #include diff --git a/src/Functions/FunctionsTonalityClassification.cpp b/src/Functions/FunctionsTonalityClassification.cpp index 3de38d99c88..a9321819a26 100644 --- a/src/Functions/FunctionsTonalityClassification.cpp +++ b/src/Functions/FunctionsTonalityClassification.cpp @@ -2,7 +2,7 @@ #if USE_NLP -#include +#include #include #include diff --git a/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp b/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp index 6d5e37623e9..fad822379d4 100644 --- a/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp +++ b/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include namespace DB { diff --git a/src/Functions/LowerUpperUTF8Impl.h b/src/Functions/LowerUpperUTF8Impl.h index bb794a0f8ed..eebba7b9d5f 100644 --- a/src/Functions/LowerUpperUTF8Impl.h +++ b/src/Functions/LowerUpperUTF8Impl.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #ifdef __SSE2__ @@ -94,7 +95,7 @@ struct LowerUpperUTF8Impl if (data.empty()) return; - bool all_ascii = UTF8::isAllASCII(data.data(), data.size()); + bool all_ascii = isAllASCII(data.data(), data.size()); if (all_ascii) { LowerUpperImpl::vector(data, offsets, res_data, res_offsets); diff --git a/src/Functions/URL/domain.h b/src/Functions/URL/domain.h index 87f5aeffda7..936fb9d5f00 100644 --- a/src/Functions/URL/domain.h +++ b/src/Functions/URL/domain.h @@ -3,7 +3,7 @@ #include "protocol.h" #include #include -#include +#include namespace DB { diff --git a/src/Functions/URL/netloc.cpp b/src/Functions/URL/netloc.cpp index abfa7ec26fd..d1ca4fa1614 100644 --- a/src/Functions/URL/netloc.cpp +++ b/src/Functions/URL/netloc.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include diff --git a/src/Functions/URL/port.cpp b/src/Functions/URL/port.cpp index 942f6b702fd..c8f50f10a56 100644 --- a/src/Functions/URL/port.cpp +++ b/src/Functions/URL/port.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include diff --git a/src/Functions/URL/protocol.h b/src/Functions/URL/protocol.h index c1d83192835..5e90f538ff1 100644 --- a/src/Functions/URL/protocol.h +++ b/src/Functions/URL/protocol.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include diff --git a/src/Functions/UserDefined/ExternalUserDefinedExecutableFunctionsLoader.cpp b/src/Functions/UserDefined/ExternalUserDefinedExecutableFunctionsLoader.cpp index a4f17aa1201..2c031158c48 100644 --- a/src/Functions/UserDefined/ExternalUserDefinedExecutableFunctionsLoader.cpp +++ b/src/Functions/UserDefined/ExternalUserDefinedExecutableFunctionsLoader.cpp @@ -1,7 +1,7 @@ #include "ExternalUserDefinedExecutableFunctionsLoader.h" #include -#include +#include #include diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.cpp b/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.cpp index d874612ad04..b406cc8d317 100644 --- a/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.cpp +++ b/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.cpp @@ -3,7 +3,7 @@ #include "Functions/UserDefined/UserDefinedSQLFunctionFactory.h" #include "Functions/UserDefined/UserDefinedSQLObjectType.h" -#include +#include #include #include #include diff --git a/src/Functions/alphaTokens.cpp b/src/Functions/alphaTokens.cpp index 35f434e7498..f4d77f1d654 100644 --- a/src/Functions/alphaTokens.cpp +++ b/src/Functions/alphaTokens.cpp @@ -1,7 +1,7 @@ #include #include -#include +#include namespace DB diff --git a/src/Functions/arrayStringConcat.cpp b/src/Functions/arrayStringConcat.cpp index b787feeeca1..421408c01f2 100644 --- a/src/Functions/arrayStringConcat.cpp +++ b/src/Functions/arrayStringConcat.cpp @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include diff --git a/src/Functions/decodeHTMLComponent.cpp b/src/Functions/decodeHTMLComponent.cpp index 4db3c43f946..00a601b77a6 100644 --- a/src/Functions/decodeHTMLComponent.cpp +++ b/src/Functions/decodeHTMLComponent.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Functions/decodeXMLComponent.cpp b/src/Functions/decodeXMLComponent.cpp index a25e67e0e37..cbbe46fcb8c 100644 --- a/src/Functions/decodeXMLComponent.cpp +++ b/src/Functions/decodeXMLComponent.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Functions/extractAll.cpp b/src/Functions/extractAll.cpp index f0c18bf79b9..5801a7b8f4f 100644 --- a/src/Functions/extractAll.cpp +++ b/src/Functions/extractAll.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Functions/extractTextFromHTML.cpp b/src/Functions/extractTextFromHTML.cpp index 4eefeaa9f86..d9aa004b279 100644 --- a/src/Functions/extractTextFromHTML.cpp +++ b/src/Functions/extractTextFromHTML.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include /** A function to extract text from HTML or XHTML. diff --git a/src/Functions/initcap.cpp b/src/Functions/initcap.cpp index 5460ee06792..6b2958227bc 100644 --- a/src/Functions/initcap.cpp +++ b/src/Functions/initcap.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include namespace DB { diff --git a/src/Functions/padString.cpp b/src/Functions/padString.cpp index 0922e0ddb8a..8670c837e21 100644 --- a/src/Functions/padString.cpp +++ b/src/Functions/padString.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -237,8 +238,8 @@ namespace void executeForSource(SourceStrings && strings, const ColumnPtr & column_length, const String & pad_string, StringSink & res_sink) const { const auto & chars = strings.getElements(); - bool all_ascii = UTF8::isAllASCII(reinterpret_cast(pad_string.data()), pad_string.size()) - && UTF8::isAllASCII(chars.data(), chars.size()); + bool all_ascii = isAllASCII(reinterpret_cast(pad_string.data()), pad_string.size()) + && isAllASCII(chars.data(), chars.size()); bool is_actually_utf8 = is_utf8 && !all_ascii; if (!is_actually_utf8) diff --git a/src/Functions/reverseUTF8.cpp b/src/Functions/reverseUTF8.cpp index 4ea861919a1..1aee349fa8d 100644 --- a/src/Functions/reverseUTF8.cpp +++ b/src/Functions/reverseUTF8.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include "reverse.h" @@ -27,7 +28,7 @@ struct ReverseUTF8Impl ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) { - bool all_ascii = UTF8::isAllASCII(data.data(), data.size()); + bool all_ascii = isAllASCII(data.data(), data.size()); if (all_ascii) { ReverseImpl::vector(data, offsets, res_data, res_offsets); diff --git a/src/Functions/soundex.cpp b/src/Functions/soundex.cpp index 0cddfc90f7c..77ddb14a6ec 100644 --- a/src/Functions/soundex.cpp +++ b/src/Functions/soundex.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include namespace DB diff --git a/src/Functions/splitByChar.cpp b/src/Functions/splitByChar.cpp index d3d5dc9fe4a..52db5623b89 100644 --- a/src/Functions/splitByChar.cpp +++ b/src/Functions/splitByChar.cpp @@ -2,7 +2,7 @@ #include #include #include -#include +#include #include diff --git a/src/Functions/splitByNonAlpha.cpp b/src/Functions/splitByNonAlpha.cpp index 4486a33aa88..17ff6cfb0a8 100644 --- a/src/Functions/splitByNonAlpha.cpp +++ b/src/Functions/splitByNonAlpha.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Functions/splitByRegexp.cpp b/src/Functions/splitByRegexp.cpp index 430089f14ee..32afb813a04 100644 --- a/src/Functions/splitByRegexp.cpp +++ b/src/Functions/splitByRegexp.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include diff --git a/src/Functions/splitByString.cpp b/src/Functions/splitByString.cpp index 5c97f9841e7..e9b70a58eab 100644 --- a/src/Functions/splitByString.cpp +++ b/src/Functions/splitByString.cpp @@ -2,7 +2,7 @@ #include #include #include -#include +#include #include diff --git a/src/Functions/splitByWhitespace.cpp b/src/Functions/splitByWhitespace.cpp index cf21a218b15..5bf27f64c17 100644 --- a/src/Functions/splitByWhitespace.cpp +++ b/src/Functions/splitByWhitespace.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include namespace DB diff --git a/src/Functions/substring.cpp b/src/Functions/substring.cpp index 122f83d758b..f1dea7db018 100644 --- a/src/Functions/substring.cpp +++ b/src/Functions/substring.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -149,7 +150,7 @@ public: { if (const ColumnString * col = checkAndGetColumn(column_string.get())) { - bool all_ascii = UTF8::isAllASCII(col->getChars().data(), col->getChars().size()); + bool all_ascii = isAllASCII(col->getChars().data(), col->getChars().size()); if (all_ascii) return executeForSource(column_offset, column_length, column_offset_const, column_length_const, offset, length, StringSource(*col), input_rows_count); else @@ -159,7 +160,7 @@ public: if (const ColumnConst * col_const = checkAndGetColumnConst(column_string.get())) { StringRef str_ref = col_const->getDataAt(0); - bool all_ascii = UTF8::isAllASCII(reinterpret_cast(str_ref.data), str_ref.size); + bool all_ascii = isAllASCII(reinterpret_cast(str_ref.data), str_ref.size); if (all_ascii) return executeForSource(column_offset, column_length, column_offset_const, column_length_const, offset, length, ConstSource(*col_const), input_rows_count); else diff --git a/src/Functions/substringIndex.cpp b/src/Functions/substringIndex.cpp index 74474cb4b23..15a321bd5b0 100644 --- a/src/Functions/substringIndex.cpp +++ b/src/Functions/substringIndex.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -129,8 +130,8 @@ namespace res_data.reserve(str_column->getChars().size() / 2); res_offsets.reserve(rows); - bool all_ascii = UTF8::isAllASCII(str_column->getChars().data(), str_column->getChars().size()) - && UTF8::isAllASCII(reinterpret_cast(delim.data()), delim.size()); + bool all_ascii = isAllASCII(str_column->getChars().data(), str_column->getChars().size()) + && isAllASCII(reinterpret_cast(delim.data()), delim.size()); std::unique_ptr searcher = !is_utf8 || all_ascii ? nullptr : std::make_unique(delim.data(), delim.size()); @@ -162,8 +163,8 @@ namespace res_data.reserve(str_column->getChars().size() / 2); res_offsets.reserve(rows); - bool all_ascii = UTF8::isAllASCII(str_column->getChars().data(), str_column->getChars().size()) - && UTF8::isAllASCII(reinterpret_cast(delim.data()), delim.size()); + bool all_ascii = isAllASCII(str_column->getChars().data(), str_column->getChars().size()) + && isAllASCII(reinterpret_cast(delim.data()), delim.size()); std::unique_ptr searcher = !is_utf8 || all_ascii ? nullptr : std::make_unique(delim.data(), delim.size()); @@ -194,8 +195,8 @@ namespace res_data.reserve(str.size() * rows / 2); res_offsets.reserve(rows); - bool all_ascii = UTF8::isAllASCII(reinterpret_cast(str.data()), str.size()) - && UTF8::isAllASCII(reinterpret_cast(delim.data()), delim.size()); + bool all_ascii = isAllASCII(reinterpret_cast(str.data()), str.size()) + && isAllASCII(reinterpret_cast(delim.data()), delim.size()); std::unique_ptr searcher = !is_utf8 || all_ascii ? nullptr : std::make_unique(delim.data(), delim.size()); diff --git a/src/Functions/translate.cpp b/src/Functions/translate.cpp index c7173909029..2df08a5664e 100644 --- a/src/Functions/translate.cpp +++ b/src/Functions/translate.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/IO/HTTPChunkedReadBuffer.cpp b/src/IO/HTTPChunkedReadBuffer.cpp index 41788fa8ce7..b5ac6a9b728 100644 --- a/src/IO/HTTPChunkedReadBuffer.cpp +++ b/src/IO/HTTPChunkedReadBuffer.cpp @@ -1,7 +1,7 @@ #include #include -#include +#include #include #include diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index 8c83eac5cff..b428b1c7d8a 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index 5cf7d3e5b66..63bfae513e7 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -30,7 +30,7 @@ #include #include -#include +#include #include #include diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp index 56e3e0df21b..4583b2bb0ac 100644 --- a/src/IO/S3Common.cpp +++ b/src/IO/S3Common.cpp @@ -1,7 +1,7 @@ #include #include -#include +#include #include #include "config.h" diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h index a30e2feb439..d4b2d8ea0dc 100644 --- a/src/IO/WriteHelpers.h +++ b/src/IO/WriteHelpers.h @@ -26,7 +26,7 @@ #include #include -#include +#include #include #include diff --git a/src/IO/parseDateTimeBestEffort.cpp b/src/IO/parseDateTimeBestEffort.cpp index 70401fdf72d..e046e837689 100644 --- a/src/IO/parseDateTimeBestEffort.cpp +++ b/src/IO/parseDateTimeBestEffort.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include diff --git a/src/IO/readFloatText.h b/src/IO/readFloatText.h index d1652784cc2..3a21d7201a9 100644 --- a/src/IO/readFloatText.h +++ b/src/IO/readFloatText.h @@ -3,7 +3,7 @@ #include #include #include -#include +#include #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wunneeded-internal-declaration" diff --git a/src/Interpreters/Access/InterpreterShowAccessEntitiesQuery.cpp b/src/Interpreters/Access/InterpreterShowAccessEntitiesQuery.cpp index 76979ed86c8..71fc1047cfa 100644 --- a/src/Interpreters/Access/InterpreterShowAccessEntitiesQuery.cpp +++ b/src/Interpreters/Access/InterpreterShowAccessEntitiesQuery.cpp @@ -2,7 +2,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp b/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp index 1147d74c146..96d8e55a74c 100644 --- a/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp +++ b/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp index ec6283df649..59c98491c14 100644 --- a/src/Interpreters/Cluster.cpp +++ b/src/Interpreters/Cluster.cpp @@ -2,7 +2,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Interpreters/ClusterDiscovery.cpp b/src/Interpreters/ClusterDiscovery.cpp index d432488964d..6f9c375c2f5 100644 --- a/src/Interpreters/ClusterDiscovery.cpp +++ b/src/Interpreters/ClusterDiscovery.cpp @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 17a678ef9b8..d1701d268f1 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -40,7 +40,8 @@ namespace ErrorCodes namespace ClusterProxy { -ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, +ContextMutablePtr updateSettingsAndClientInfoForCluster(const Cluster & cluster, + bool is_remote_function, ContextPtr context, const Settings & settings, const StorageID & main_table, @@ -48,9 +49,17 @@ ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, LoggerPtr log, const DistributedSettings * distributed_settings) { + ClientInfo new_client_info = context->getClientInfo(); Settings new_settings = settings; new_settings.queue_max_wait_ms = Cluster::saturate(new_settings.queue_max_wait_ms, settings.max_execution_time); + /// In case of interserver mode we should reset initial_user for remote() function to use passed user from the query. + if (is_remote_function) + { + const auto & address = cluster.getShardsAddresses().front().front(); + new_client_info.initial_user = address.user; + } + /// If "secret" (in remote_servers) is not in use, /// user on the shard is not the same as the user on the initiator, /// hence per-user limits should not be applied. @@ -170,9 +179,23 @@ ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, auto new_context = Context::createCopy(context); new_context->setSettings(new_settings); + new_context->setClientInfo(new_client_info); return new_context; } +ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, ContextPtr context, const Settings & settings, const StorageID & main_table) +{ + return updateSettingsAndClientInfoForCluster(cluster, + /* is_remote_function= */ false, + context, + settings, + main_table, + /* additional_filter_ast= */ {}, + /* log= */ {}, + /* distributed_settings= */ {}); +} + + static ThrottlerPtr getThrottler(const ContextPtr & context) { const Settings & settings = context->getSettingsRef(); @@ -211,7 +234,8 @@ void executeQuery( const ExpressionActionsPtr & sharding_key_expr, const std::string & sharding_key_column_name, const DistributedSettings & distributed_settings, - AdditionalShardFilterGenerator shard_filter_generator) + AdditionalShardFilterGenerator shard_filter_generator, + bool is_remote_function) { const Settings & settings = context->getSettingsRef(); @@ -224,8 +248,8 @@ void executeQuery( SelectStreamFactory::Shards remote_shards; auto cluster = query_info.getCluster(); - auto new_context = updateSettingsForCluster(*cluster, context, settings, main_table, query_info.additional_filter_ast, log, - &distributed_settings); + auto new_context = updateSettingsAndClientInfoForCluster(*cluster, is_remote_function, context, + settings, main_table, query_info.additional_filter_ast, log, &distributed_settings); if (context->getSettingsRef().allow_experimental_parallel_reading_from_replicas && context->getSettingsRef().allow_experimental_parallel_reading_from_replicas.value != new_context->getSettingsRef().allow_experimental_parallel_reading_from_replicas.value) diff --git a/src/Interpreters/ClusterProxy/executeQuery.h b/src/Interpreters/ClusterProxy/executeQuery.h index c01fdb678ff..14178cc0bf1 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.h +++ b/src/Interpreters/ClusterProxy/executeQuery.h @@ -44,13 +44,7 @@ class SelectStreamFactory; /// - optimize_skip_unused_shards_nesting /// /// @return new Context with adjusted settings -ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, - ContextPtr context, - const Settings & settings, - const StorageID & main_table, - ASTPtr additional_filter_ast = nullptr, - LoggerPtr log = nullptr, - const DistributedSettings * distributed_settings = nullptr); +ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, ContextPtr context, const Settings & settings, const StorageID & main_table); using AdditionalShardFilterGenerator = std::function; /// Execute a distributed query, creating a query plan, from which the query pipeline can be built. @@ -69,7 +63,8 @@ void executeQuery( const ExpressionActionsPtr & sharding_key_expr, const std::string & sharding_key_column_name, const DistributedSettings & distributed_settings, - AdditionalShardFilterGenerator shard_filter_generator); + AdditionalShardFilterGenerator shard_filter_generator, + bool is_remote_function); /// move to cpp this one void executeQueryWithParallelReplicas( diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 395218f834f..d80d5cd5b93 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -47,7 +47,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/Interpreters/ExternalLoader.cpp b/src/Interpreters/ExternalLoader.cpp index a636e59fa1a..96405f35f3f 100644 --- a/src/Interpreters/ExternalLoader.cpp +++ b/src/Interpreters/ExternalLoader.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Interpreters/ExternalLoaderXMLConfigRepository.cpp b/src/Interpreters/ExternalLoaderXMLConfigRepository.cpp index a15f918f457..e404797501d 100644 --- a/src/Interpreters/ExternalLoaderXMLConfigRepository.cpp +++ b/src/Interpreters/ExternalLoaderXMLConfigRepository.cpp @@ -2,7 +2,7 @@ #include -#include +#include #include #include #include diff --git a/src/Interpreters/ITokenExtractor.cpp b/src/Interpreters/ITokenExtractor.cpp index 9c4027dfa0a..1c5d0d4b6d4 100644 --- a/src/Interpreters/ITokenExtractor.cpp +++ b/src/Interpreters/ITokenExtractor.cpp @@ -2,7 +2,7 @@ #include -#include +#include #include #include diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 519cbde588f..4fdd804452d 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -6,7 +6,7 @@ #include #include "Common/Exception.h" -#include +#include #include #include #include diff --git a/src/Interpreters/InterserverCredentials.cpp b/src/Interpreters/InterserverCredentials.cpp index c344732a262..1327a2ef388 100644 --- a/src/Interpreters/InterserverCredentials.cpp +++ b/src/Interpreters/InterserverCredentials.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include namespace DB { diff --git a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp index 5cda4c982b4..6a3a181ed26 100644 --- a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp +++ b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include namespace DB diff --git a/src/Interpreters/QueryNormalizer.cpp b/src/Interpreters/QueryNormalizer.cpp index f47635a3c3f..a8639906aad 100644 --- a/src/Interpreters/QueryNormalizer.cpp +++ b/src/Interpreters/QueryNormalizer.cpp @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Interpreters/TableJoin.cpp b/src/Interpreters/TableJoin.cpp index 1ee8ca14b2f..6191eb73fd4 100644 --- a/src/Interpreters/TableJoin.cpp +++ b/src/Interpreters/TableJoin.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index 03df7283992..c21c4d34fa8 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -4,7 +4,7 @@ #include #include -#include +#include #include #include diff --git a/src/Interpreters/misc.h b/src/Interpreters/misc.h index c009808de3f..b77fc5aee1e 100644 --- a/src/Interpreters/misc.h +++ b/src/Interpreters/misc.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include namespace DB diff --git a/src/Parsers/CMakeLists.txt b/src/Parsers/CMakeLists.txt index 3bc1b3a981f..d5653da7b3a 100644 --- a/src/Parsers/CMakeLists.txt +++ b/src/Parsers/CMakeLists.txt @@ -7,7 +7,7 @@ add_headers_and_sources(clickhouse_parsers ./Kusto) add_headers_and_sources(clickhouse_parsers ./PRQL) add_headers_and_sources(clickhouse_parsers ./Kusto/KustoFunctions) add_library(clickhouse_parsers ${clickhouse_parsers_headers} ${clickhouse_parsers_sources}) -target_link_libraries(clickhouse_parsers PUBLIC clickhouse_common_io clickhouse_common_access string_utils) +target_link_libraries(clickhouse_parsers PUBLIC clickhouse_common_io clickhouse_common_access) if (TARGET ch_rust::prql) target_link_libraries(clickhouse_parsers PRIVATE ch_rust::prql) endif () diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 59b586d46a0..416f696323c 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -7,7 +7,7 @@ #include #include -#include +#include #include #include "Parsers/CommonParsers.h" diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index ee9e199b9b8..7cdfaf988a3 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include diff --git a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp index 16436d38d32..0eb83b8b5ac 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp @@ -14,7 +14,7 @@ #include #include #include -#include +#include namespace DB { diff --git a/src/Parsers/Kusto/ParserKQLDateTypeTimespan.cpp b/src/Parsers/Kusto/ParserKQLDateTypeTimespan.cpp index c4f84d576cb..19625f6624d 100644 --- a/src/Parsers/Kusto/ParserKQLDateTypeTimespan.cpp +++ b/src/Parsers/Kusto/ParserKQLDateTypeTimespan.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include namespace DB { diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp index fbf2110e664..e508b69bdff 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.cpp +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -7,6 +7,7 @@ #include #include #include +#include namespace DB { @@ -62,49 +63,51 @@ bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & exp bool ParserKQLTableFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - ParserKQLWithUnionQuery kql_p; - ASTPtr select; - ParserToken s_lparen(TokenType::OpeningRoundBracket); + ParserToken lparen(TokenType::OpeningRoundBracket); - auto begin = pos; - auto paren_count = 0; + ASTPtr string_literal; + ParserStringLiteral parser_string_literal; + + if (!lparen.ignore(pos, expected)) + return false; + + size_t paren_count = 0; String kql_statement; - - if (s_lparen.ignore(pos, expected)) + if (parser_string_literal.parse(pos, string_literal, expected)) { - if (pos->type == TokenType::HereDoc) - { - kql_statement = String(pos->begin + 2, pos->end - 2); - } - else - { - ++paren_count; - auto pos_start = pos; - while (isValidKQLPos(pos)) - { - if (pos->type == TokenType::ClosingRoundBracket) - --paren_count; - if (pos->type == TokenType::OpeningRoundBracket) - ++paren_count; - - if (paren_count == 0) - break; - ++pos; - } - kql_statement = String(pos_start->begin, (--pos)->end); - } - ++pos; - Tokens token_kql(kql_statement.c_str(), kql_statement.c_str() + kql_statement.size()); - IParser::Pos pos_kql(token_kql, pos.max_depth, pos.max_backtracks); - - if (kql_p.parse(pos_kql, select, expected)) - { - node = select; - ++pos; - return true; - } + kql_statement = typeid_cast(*string_literal).value.safeGet(); } - pos = begin; - return false; + else + { + ++paren_count; + auto pos_start = pos; + while (isValidKQLPos(pos)) + { + if (pos->type == TokenType::ClosingRoundBracket) + --paren_count; + if (pos->type == TokenType::OpeningRoundBracket) + ++paren_count; + + if (paren_count == 0) + break; + ++pos; + } + if (!isValidKQLPos(pos)) + { + return false; + } + --pos; + kql_statement = String(pos_start->begin, pos->end); + ++pos; + } + + Tokens token_kql(kql_statement.data(), kql_statement.data() + kql_statement.size()); + IParser::Pos pos_kql(token_kql, pos.max_depth, pos.max_backtracks); + Expected kql_expected; + kql_expected.enable_highlighting = false; + if (!ParserKQLWithUnionQuery().parse(pos_kql, node, kql_expected)) + return false; + ++pos; + return true; } } diff --git a/src/Parsers/Kusto/parseKQLQuery.cpp b/src/Parsers/Kusto/parseKQLQuery.cpp index 34a009873f8..34076168480 100644 --- a/src/Parsers/Kusto/parseKQLQuery.cpp +++ b/src/Parsers/Kusto/parseKQLQuery.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Parsers/Lexer.cpp b/src/Parsers/Lexer.cpp index 9ac6e623803..34855a7ce20 100644 --- a/src/Parsers/Lexer.cpp +++ b/src/Parsers/Lexer.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include namespace DB diff --git a/src/Parsers/ParserDataType.cpp b/src/Parsers/ParserDataType.cpp index fdd712f2e68..05c9a2cd306 100644 --- a/src/Parsers/ParserDataType.cpp +++ b/src/Parsers/ParserDataType.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Parsers/formatSettingName.cpp b/src/Parsers/formatSettingName.cpp index efbfffddd7b..59973379167 100644 --- a/src/Parsers/formatSettingName.cpp +++ b/src/Parsers/formatSettingName.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include diff --git a/src/Parsers/obfuscateQueries.cpp b/src/Parsers/obfuscateQueries.cpp index 2ed551851e8..074b6797517 100644 --- a/src/Parsers/obfuscateQueries.cpp +++ b/src/Parsers/obfuscateQueries.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Parsers/parseQuery.cpp b/src/Parsers/parseQuery.cpp index 66bd76687aa..41c51267496 100644 --- a/src/Parsers/parseQuery.cpp +++ b/src/Parsers/parseQuery.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Parsers/queryNormalization.cpp b/src/Parsers/queryNormalization.cpp index 4a9dd8ceb98..4890ad6952d 100644 --- a/src/Parsers/queryNormalization.cpp +++ b/src/Parsers/queryNormalization.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp index 79b5dae2d6e..df27520856e 100644 --- a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index e523a2c243c..6f0fa55c349 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -1534,25 +1534,7 @@ void ReadFromMergeTree::applyFilters(ActionDAGNodes added_filter_nodes) { if (!indexes) { - /// Analyzer generates unique ColumnIdentifiers like __table1.__partition_id in filter nodes, - /// while key analysis still requires unqualified column names. - std::unordered_map node_name_to_input_node_column; - if (query_info.planner_context) - { - const auto & table_expression_data = query_info.planner_context->getTableExpressionDataOrThrow(query_info.table_expression); - const auto & alias_column_expressions = table_expression_data.getAliasColumnExpressions(); - for (const auto & [column_identifier, column_name] : table_expression_data.getColumnIdentifierToColumnName()) - { - /// ALIAS columns cannot be used in the filter expression without being calculated in ActionsDAG, - /// so they should not be added to the input nodes. - if (alias_column_expressions.contains(column_name)) - continue; - const auto & column = table_expression_data.getColumnOrThrow(column_name); - node_name_to_input_node_column.emplace(column_identifier, ColumnWithTypeAndName(column.type, column_name)); - } - } - - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes, node_name_to_input_node_column); + filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes, query_info.buildNodeNameToInputNodeColumn()); /// NOTE: Currently we store two DAGs for analysis: /// (1) SourceStepWithFilter::filter_nodes, (2) query_info.filter_actions_dag. Make sure there are consistent. diff --git a/src/Processors/QueryPlan/ReadFromPreparedSource.cpp b/src/Processors/QueryPlan/ReadFromPreparedSource.cpp index b845101125b..92c936cdc20 100644 --- a/src/Processors/QueryPlan/ReadFromPreparedSource.cpp +++ b/src/Processors/QueryPlan/ReadFromPreparedSource.cpp @@ -1,6 +1,5 @@ #include #include -#include #include namespace DB diff --git a/src/Processors/QueryPlan/SourceStepWithFilter.cpp b/src/Processors/QueryPlan/SourceStepWithFilter.cpp index ce5a59a92f9..ad0940b90b9 100644 --- a/src/Processors/QueryPlan/SourceStepWithFilter.cpp +++ b/src/Processors/QueryPlan/SourceStepWithFilter.cpp @@ -80,7 +80,7 @@ Block SourceStepWithFilter::applyPrewhereActions(Block block, const PrewhereInfo void SourceStepWithFilter::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes, query_info.buildNodeNameToInputNodeColumn()); } void SourceStepWithFilter::updatePrewhereInfo(const PrewhereInfoPtr & prewhere_info_value) diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index ce80d0c22c6..a677c537622 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Server/HTTPHandlerFactory.h b/src/Server/HTTPHandlerFactory.h index ac18c36e6c9..b4c32366463 100644 --- a/src/Server/HTTPHandlerFactory.h +++ b/src/Server/HTTPHandlerFactory.h @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Server/HTTPHandlerRequestFilter.h b/src/Server/HTTPHandlerRequestFilter.h index 15e64cf7f48..de1920bd535 100644 --- a/src/Server/HTTPHandlerRequestFilter.h +++ b/src/Server/HTTPHandlerRequestFilter.h @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include diff --git a/src/Storages/CompressionCodecSelector.h b/src/Storages/CompressionCodecSelector.h index ad6e943e821..e03d06bacdb 100644 --- a/src/Storages/CompressionCodecSelector.h +++ b/src/Storages/CompressionCodecSelector.h @@ -1,7 +1,7 @@ #pragma once #include #include -#include +#include #include #include #include diff --git a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp index 14866c25365..d471c67553d 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp +++ b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 0f3b03f0955..33bde34b4f9 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -994,7 +994,8 @@ private: void ReadFromHDFS::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + const ActionsDAG::Node * predicate = nullptr; if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index b532abc9074..920155bf689 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -1,6 +1,6 @@ #include -#include +#include #include #include #include diff --git a/src/Storages/IStorageCluster.cpp b/src/Storages/IStorageCluster.cpp index ab45ce877c2..9c5b29ae265 100644 --- a/src/Storages/IStorageCluster.cpp +++ b/src/Storages/IStorageCluster.cpp @@ -86,7 +86,8 @@ private: void ReadFromCluster::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + const ActionsDAG::Node * predicate = nullptr; if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 463ca07ec57..e0437f4f715 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -34,7 +34,7 @@ #include #include #include -#include +#include #include #include @@ -793,7 +793,8 @@ void IMergeTreeDataPart::addProjectionPart( projection_parts[projection_name] = std::move(projection_part); } -void IMergeTreeDataPart::loadProjections(bool require_columns_checksums, bool check_consistency, bool & has_broken_projection, bool if_not_loaded) +void IMergeTreeDataPart::loadProjections( + bool require_columns_checksums, bool check_consistency, bool & has_broken_projection, bool if_not_loaded, bool only_metadata) { auto metadata_snapshot = storage.getInMemoryMetadataPtr(); for (const auto & projection : metadata_snapshot->projections) @@ -813,7 +814,10 @@ void IMergeTreeDataPart::loadProjections(bool require_columns_checksums, bool ch try { - part->loadColumnsChecksumsIndexes(require_columns_checksums, check_consistency); + if (only_metadata) + part->loadChecksums(require_columns_checksums); + else + part->loadColumnsChecksumsIndexes(require_columns_checksums, check_consistency); } catch (...) { diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index c380f99060e..9ee01c0efc4 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -445,7 +445,15 @@ public: bool hasBrokenProjection(const String & projection_name) const; /// Return true, if all projections were loaded successfully and none was marked as broken. - void loadProjections(bool require_columns_checksums, bool check_consistency, bool & has_broken_projection, bool if_not_loaded = false); + void loadProjections( + bool require_columns_checksums, + bool check_consistency, + bool & has_broken_projection, + bool if_not_loaded = false, + bool only_metadata = false); + + /// If checksums.txt exists, reads file's checksums (and sizes) from it + void loadChecksums(bool require); void setBrokenReason(const String & message, int code) const; @@ -671,9 +679,6 @@ private: static void appendFilesOfColumns(Strings & files); - /// If checksums.txt exists, reads file's checksums (and sizes) from it - void loadChecksums(bool require); - static void appendFilesOfChecksums(Strings & files); /// Loads marks index granularity into memory diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index e87fe53a333..6f89bb62d62 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartHeader.cpp b/src/Storages/MergeTree/ReplicatedMergeTreePartHeader.cpp index 24d907dbad6..9aadc3c3ca7 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartHeader.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartHeader.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include namespace DB { diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index d6c36d12bf5..9a368bd44f5 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp index 01417b8977b..1a9aa6d0f41 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp @@ -578,7 +578,8 @@ void ReadFromEmbeddedRocksDB::initializePipeline(QueryPipelineBuilder & pipeline void ReadFromEmbeddedRocksDB::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + const auto & sample_block = getOutputStream().header; auto primary_key_data_type = sample_block.getByName(storage.primary_key).type; std::tie(keys, all_scan) = getFilterKeys(storage.primary_key, primary_key_data_type, filter_actions_dag, context); diff --git a/src/Storages/RocksDB/StorageSystemRocksDB.cpp b/src/Storages/RocksDB/StorageSystemRocksDB.cpp index 4406a7c3fd4..5105b190fd9 100644 --- a/src/Storages/RocksDB/StorageSystemRocksDB.cpp +++ b/src/Storages/RocksDB/StorageSystemRocksDB.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp index c3a772e532c..16e42e32b8a 100644 --- a/src/Storages/S3Queue/StorageS3Queue.cpp +++ b/src/Storages/S3Queue/StorageS3Queue.cpp @@ -287,7 +287,8 @@ void ReadFromS3Queue::createIterator(const ActionsDAG::Node * predicate) void ReadFromS3Queue::applyFilters(ActionDAGNodes added_filter_nodes) { - auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + const ActionsDAG::Node * predicate = nullptr; if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); diff --git a/src/Storages/SelectQueryInfo.cpp b/src/Storages/SelectQueryInfo.cpp index 665da7fee70..d59ccf0dfaf 100644 --- a/src/Storages/SelectQueryInfo.cpp +++ b/src/Storages/SelectQueryInfo.cpp @@ -13,4 +13,24 @@ bool SelectQueryInfo::isFinal() const return select.final(); } +std::unordered_map SelectQueryInfo::buildNodeNameToInputNodeColumn() const +{ + std::unordered_map node_name_to_input_node_column; + if (planner_context) + { + const auto & table_expression_data = planner_context->getTableExpressionDataOrThrow(table_expression); + const auto & alias_column_expressions = table_expression_data.getAliasColumnExpressions(); + for (const auto & [column_identifier, column_name] : table_expression_data.getColumnIdentifierToColumnName()) + { + /// ALIAS columns cannot be used in the filter expression without being calculated in ActionsDAG, + /// so they should not be added to the input nodes. + if (alias_column_expressions.contains(column_name)) + continue; + const auto & column = table_expression_data.getColumnOrThrow(column_name); + node_name_to_input_node_column.emplace(column_identifier, ColumnWithTypeAndName(column.type, column_name)); + } + } + return node_name_to_input_node_column; +} + } diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index 655676812d9..11e2a2fc5e7 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -239,5 +239,11 @@ struct SelectQueryInfo bool merge_tree_enable_remove_parts_from_snapshot_optimization = true; bool isFinal() const; + + /// Analyzer generates unique ColumnIdentifiers like __table1.__partition_id in filter nodes, + /// while key analysis still requires unqualified column names. + /// This function generates a map that maps the unique names to table column names, + /// for the current table (`table_expression`). + std::unordered_map buildNodeNameToInputNodeColumn() const; }; } diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index e1c6ec0097c..e98eaf1e8f2 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -799,7 +799,8 @@ private: void ReadFromAzureBlob::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + const ActionsDAG::Node * predicate = nullptr; if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 7b5916c0273..9882d744c29 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -926,7 +926,8 @@ void StorageDistributed::read( sharding_key_expr, sharding_key_column_name, distributed_settings, - additional_shard_filter_generator); + additional_shard_filter_generator, + /* is_remote_function= */ static_cast(owned_cluster)); /// This is a bug, it is possible only when there is no shards to query, and this is handled earlier. if (!query_plan.isInitialized()) diff --git a/src/Storages/StorageFactory.cpp b/src/Storages/StorageFactory.cpp index 307a0aa001a..9d12a1569d8 100644 --- a/src/Storages/StorageFactory.cpp +++ b/src/Storages/StorageFactory.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 7db8fc2500a..51bcc64bceb 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -1534,7 +1534,8 @@ private: void ReadFromFile::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + const ActionsDAG::Node * predicate = nullptr; if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index 1ac739f03fd..306ae782d24 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 25c48de94e1..54b2d5ef6fb 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 7afa480149f..4c678a1228b 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -1622,7 +1622,7 @@ void ReadFromMerge::applyFilters(const QueryPlan & plan, const ActionDAGNodes & void ReadFromMerge::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(added_filter_nodes); filterTablesAndCreateChildrenPlans(); diff --git a/src/Storages/StorageMergeTreeIndex.cpp b/src/Storages/StorageMergeTreeIndex.cpp index 4747232d7f7..0b1ad02f8c9 100644 --- a/src/Storages/StorageMergeTreeIndex.cpp +++ b/src/Storages/StorageMergeTreeIndex.cpp @@ -280,7 +280,8 @@ private: void ReadFromMergeTreeIndex::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 8a99a824540..199ba731f7b 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 9768653f3fe..2ce188c203c 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -1454,7 +1454,8 @@ void StorageS3::read( void ReadFromStorageS3Step::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + const ActionsDAG::Node * predicate = nullptr; if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp index 54218351cf1..205a90423bf 100644 --- a/src/Storages/StorageSet.cpp +++ b/src/Storages/StorageSet.cpp @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 8a71a771367..272f771194d 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -1038,7 +1038,8 @@ private: void ReadFromURL::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + const ActionsDAG::Node * predicate = nullptr; if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); diff --git a/src/Storages/System/CMakeLists.txt b/src/Storages/System/CMakeLists.txt index c3a2e726365..899c3d5cf40 100644 --- a/src/Storages/System/CMakeLists.txt +++ b/src/Storages/System/CMakeLists.txt @@ -47,7 +47,6 @@ add_library(clickhouse_storages_system ${storages_system_sources}) target_link_libraries(clickhouse_storages_system PRIVATE dbms common - string_utils clickhouse_common_zookeeper clickhouse_parsers Poco::JSON diff --git a/src/Storages/System/IStorageSystemOneBlock.cpp b/src/Storages/System/IStorageSystemOneBlock.cpp index 53399654c8d..456b7c4f90b 100644 --- a/src/Storages/System/IStorageSystemOneBlock.cpp +++ b/src/Storages/System/IStorageSystemOneBlock.cpp @@ -91,7 +91,8 @@ void ReadFromSystemOneBlock::initializePipeline(QueryPipelineBuilder & pipeline, void ReadFromSystemOneBlock::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); } diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index 74b44cc0a2d..49da1eba9ec 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -342,7 +342,8 @@ private: void ReadFromSystemColumns::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); } diff --git a/src/Storages/System/StorageSystemDashboards.cpp b/src/Storages/System/StorageSystemDashboards.cpp index 23d8fcfc481..9682fbc74a1 100644 --- a/src/Storages/System/StorageSystemDashboards.cpp +++ b/src/Storages/System/StorageSystemDashboards.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include namespace DB { diff --git a/src/Storages/System/StorageSystemDataSkippingIndices.cpp b/src/Storages/System/StorageSystemDataSkippingIndices.cpp index 2afc03d0e5e..093adc59cc6 100644 --- a/src/Storages/System/StorageSystemDataSkippingIndices.cpp +++ b/src/Storages/System/StorageSystemDataSkippingIndices.cpp @@ -219,7 +219,8 @@ private: void ReadFromSystemDataSkippingIndices::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); } diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp index 31d566ef8b6..f48a8c67971 100644 --- a/src/Storages/System/StorageSystemDetachedParts.cpp +++ b/src/Storages/System/StorageSystemDetachedParts.cpp @@ -313,7 +313,8 @@ protected: void ReadFromSystemDetachedParts::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + if (filter_actions_dag) { const auto * predicate = filter_actions_dag->getOutputs().at(0); diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index b1ea2dd3f2b..175c0834bcb 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -263,7 +263,8 @@ ReadFromSystemPartsBase::ReadFromSystemPartsBase( void ReadFromSystemPartsBase::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + if (filter_actions_dag) { const auto * predicate = filter_actions_dag->getOutputs().at(0); diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp index 10d5c353c43..3bd5fd290db 100644 --- a/src/Storages/System/StorageSystemReplicas.cpp +++ b/src/Storages/System/StorageSystemReplicas.cpp @@ -290,7 +290,8 @@ private: void ReadFromSystemReplicas::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); } diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index d428d6bd6d0..1f900ec623e 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include @@ -750,7 +750,8 @@ void StorageSystemTables::read( void ReadFromSystemTables::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + const ActionsDAG::Node * predicate = nullptr; if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp index 7afa1894a64..cb46cd19517 100644 --- a/src/Storages/System/StorageSystemZooKeeper.cpp +++ b/src/Storages/System/StorageSystemZooKeeper.cpp @@ -474,7 +474,8 @@ static Paths extractPath(const ActionsDAG::NodeRawConstPtrs & filter_nodes, Cont void ReadFromSystemZooKeeper::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(added_filter_nodes); + paths = extractPath(added_filter_nodes.nodes, context, context->getSettingsRef().allow_unrestricted_reads_from_keeper); } diff --git a/src/Storages/examples/CMakeLists.txt b/src/Storages/examples/CMakeLists.txt index cddfc9404d4..b4786b7313b 100644 --- a/src/Storages/examples/CMakeLists.txt +++ b/src/Storages/examples/CMakeLists.txt @@ -5,4 +5,4 @@ clickhouse_add_executable (merge_selector2 merge_selector2.cpp) target_link_libraries (merge_selector2 PRIVATE dbms) clickhouse_add_executable (get_current_inserts_in_replicated get_current_inserts_in_replicated.cpp) -target_link_libraries (get_current_inserts_in_replicated PRIVATE dbms clickhouse_common_config clickhouse_common_zookeeper string_utils) +target_link_libraries (get_current_inserts_in_replicated PRIVATE dbms clickhouse_common_config clickhouse_common_zookeeper) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 08048564383..3a616c8aad6 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -17,7 +17,7 @@ from typing import Any, Dict, List, Optional, Sequence, Set, Tuple, Union import docker_images_helper import upload_result_helper from build_check import get_release_or_pr -from ci_config import CI_CONFIG, Build, CILabels, CIStages, JobNames, StatusNames +from ci_config import CI_CONFIG, Build, CILabels, CIStages, JobNames from ci_utils import GHActions, is_hex, normalize_string from clickhouse_helper import ( CiLogsCredentials, @@ -34,16 +34,12 @@ from commit_status_helper import ( get_commit, post_commit_status, set_status_comment, - update_mergeable_check, - update_upstream_sync_status, ) from digest_helper import DockerDigester, JobDigester from env_helper import ( CI, GITHUB_JOB_API_URL, - GITHUB_REPOSITORY, GITHUB_RUN_URL, - GITHUB_UPSTREAM_REPOSITORY, REPO_COPY, REPORT_PATH, S3_BUILDS_BUCKET, @@ -56,7 +52,6 @@ from github_helper import GitHub from pr_info import PRInfo from report import ERROR, SUCCESS, BuildResult, JobReport from s3_helper import S3Helper -from synchronizer_utils import SYNC_BRANCH_PREFIX from version_helper import get_version_from_repo # pylint: disable=too-many-lines @@ -891,9 +886,9 @@ class CiOptions: for job in job_with_parents: if job in jobs_to_do and job not in jobs_to_do_requested: jobs_to_do_requested.append(job) - assert ( - jobs_to_do_requested - ), f"Include tags are set but no job configured - Invalid tags, probably [{self.include_keywords}]" + print( + f"WARNING: Include tags are set but no job configured - Invalid tags, probably [{self.include_keywords}]" + ) if JobNames.STYLE_CHECK not in jobs_to_do_requested: # Style check must not be omitted jobs_to_do_requested.append(JobNames.STYLE_CHECK) @@ -903,7 +898,7 @@ class CiOptions: if self.ci_sets: for tag in self.ci_sets: label_config = CI_CONFIG.get_label_config(tag) - assert label_config, f"Unknonwn tag [{tag}]" + assert label_config, f"Unknown tag [{tag}]" print( f"NOTE: CI Set's tag: [{tag}], add jobs: [{label_config.run_jobs}]" ) @@ -2189,39 +2184,6 @@ def main() -> int: pr_info, dump_to_file=True, ) - if not pr_info.is_merge_queue: - # in the merge queue mergeable status must be set only in FinishCheck (last job in wf) - mergeable_status = update_mergeable_check( - commit, - pr_info, - job_report.check_name or _get_ext_check_name(args.job_name), - ) - - # Process upstream StatusNames.SYNC - if ( - pr_info.head_ref.startswith(f"{SYNC_BRANCH_PREFIX}/pr/") - and mergeable_status - and GITHUB_REPOSITORY != GITHUB_UPSTREAM_REPOSITORY - ): - upstream_pr_number = int( - pr_info.head_ref.split("/pr/", maxsplit=1)[1] - ) - update_upstream_sync_status( - upstream_pr_number, pr_info.number, gh, mergeable_status - ) - prepared_events = prepare_tests_results_for_clickhouse( - pr_info, - [], - job_report.status, - 0, - job_report.start_time, - f"https://github.com/ClickHouse/ClickHouse/pull/{upstream_pr_number}", - StatusNames.SYNC, - ) - prepared_events[0]["test_context_raw"] = args.job_name - ch_helper.insert_events_into( - db="default", table="checks", events=prepared_events - ) print(f"Job report url: [{check_url}]") prepared_events = prepare_tests_results_for_clickhouse( diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index dcd2a5a4228..c3421998ca9 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -26,6 +26,7 @@ class CIStages(metaclass=WithIter): BUILDS_2 = "Builds_2" TESTS_1 = "Tests_1" TESTS_2 = "Tests_2" + TESTS_3 = "Tests_3" class Runners(metaclass=WithIter): @@ -581,7 +582,6 @@ class CIConfig: elif job_name == JobNames.BUILD_CHECK_SPECIAL: stage_type = CIStages.TESTS_2 elif self.is_test_job(job_name): - stage_type = CIStages.TESTS_1 if job_name in CI_CONFIG.test_configs: required_build = CI_CONFIG.test_configs[job_name].required_build assert required_build @@ -593,6 +593,8 @@ class CIConfig: stage_type = CIStages.TESTS_2 else: stage_type = CIStages.TESTS_1 + if job_name not in REQUIRED_CHECKS: + stage_type = CIStages.TESTS_3 assert stage_type, f"BUG [{job_name}]" return stage_type diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py index fc939a08e11..e1c47353743 100644 --- a/tests/ci/commit_status_helper.py +++ b/tests/ci/commit_status_helper.py @@ -447,9 +447,7 @@ def set_mergeable_check( ) -def update_mergeable_check( - commit: Commit, pr_info: PRInfo, check_name: str -) -> Optional[CommitStatus]: +def update_mergeable_check(commit: Commit, pr_info: PRInfo, check_name: str) -> None: "check if the check_name in REQUIRED_CHECKS and then trigger update" not_run = ( pr_info.labels.intersection({Labels.SKIP_MERGEABLE_CHECK, Labels.RELEASE}) @@ -460,17 +458,21 @@ def update_mergeable_check( if not_run: # Let's avoid unnecessary work - return None + return logging.info("Update Mergeable Check by %s", check_name) statuses = get_commit_filtered_statuses(commit) - return trigger_mergeable_check(commit, statuses) + trigger_mergeable_check(commit, statuses) def trigger_mergeable_check( - commit: Commit, statuses: CommitStatuses, hide_url: bool = False -) -> CommitStatus: + commit: Commit, + statuses: CommitStatuses, + hide_url: bool = False, + set_if_green: bool = False, + workflow_failed: bool = False, +) -> StatusType: """calculate and update StatusNames.MERGEABLE""" required_checks = [status for status in statuses if is_required(status.context)] @@ -498,19 +500,27 @@ def trigger_mergeable_check( if fail: description = "failed: " + ", ".join(fail) state = FAILURE + elif workflow_failed: + description = "check workflow failures" + state = FAILURE description = format_description(description) - if mergeable_status is None or mergeable_status.description != description: - return set_mergeable_check(commit, description, state, hide_url) + if not set_if_green and state == SUCCESS: + # do not set green Mergeable Check status + pass + else: + if mergeable_status is None or mergeable_status.description != description: + set_mergeable_check(commit, description, state, hide_url) - return mergeable_status + return state def update_upstream_sync_status( upstream_pr_number: int, sync_pr_number: int, gh: Github, - mergeable_status: CommitStatus, + state: StatusType, + can_set_green_mergeable_status: bool = False, ) -> None: upstream_repo = gh.get_repo(GITHUB_UPSTREAM_REPOSITORY) upstream_pr = upstream_repo.get_pull(upstream_pr_number) @@ -518,46 +528,41 @@ def update_upstream_sync_status( sync_pr = sync_repo.get_pull(sync_pr_number) # Find the commit that is in both repos, upstream and cloud sync_commits = sync_pr.get_commits().reversed - upstream_commits = upstream_pr.get_commits() + upstream_commits = upstream_pr.get_commits().reversed # Github objects are compared by _url attribute. We can't compare them directly and # should compare commits by SHA1 - upstream_shas = [uc.sha for uc in upstream_commits] + upstream_shas = [c.sha for c in upstream_commits] logging.info("Commits in upstream PR:\n %s", ", ".join(upstream_shas)) - sync_shas = [uc.sha for uc in upstream_commits] + sync_shas = [c.sha for c in sync_commits] logging.info("Commits in sync PR:\n %s", ", ".join(reversed(sync_shas))) - found = False - for commit in sync_commits: - try: - idx = upstream_shas.index(commit.sha) - found = True - upstream_commit = upstream_commits[idx] + + # find latest synced commit + last_synced_upstream_commit = None + for commit in upstream_commits: + if commit.sha in sync_shas: + last_synced_upstream_commit = commit break - except ValueError: - continue - if not found: - logging.info( - "There's no same commits in upstream and sync PRs, probably force-push" - ) - return + assert last_synced_upstream_commit - sync_status = get_status(mergeable_status.state) + sync_status = get_status(state) logging.info( "Using commit %s to post the %s status `%s`: [%s]", - upstream_commit.sha, + last_synced_upstream_commit.sha, sync_status, StatusNames.SYNC, - mergeable_status.description, + "", ) post_commit_status( - upstream_commit, + last_synced_upstream_commit, sync_status, "", # let's won't expose any urls from cloud - mergeable_status.description, + "", StatusNames.SYNC, ) trigger_mergeable_check( - upstream_commit, - get_commit_filtered_statuses(upstream_commit), + last_synced_upstream_commit, + get_commit_filtered_statuses(last_synced_upstream_commit), True, + set_if_green=can_set_green_mergeable_status, ) diff --git a/tests/ci/finish_check.py b/tests/ci/finish_check.py index a66ebbeadf4..1a7000f5353 100644 --- a/tests/ci/finish_check.py +++ b/tests/ci/finish_check.py @@ -11,10 +11,13 @@ from commit_status_helper import ( post_commit_status, set_mergeable_check, trigger_mergeable_check, + update_upstream_sync_status, ) from get_robot_token import get_best_robot_token from pr_info import PRInfo from report import PENDING, SUCCESS +from synchronizer_utils import SYNC_BRANCH_PREFIX +from env_helper import GITHUB_REPOSITORY, GITHUB_UPSTREAM_REPOSITORY def main(): @@ -40,7 +43,21 @@ def main(): set_mergeable_check(commit, "workflow passed", "success") else: statuses = get_commit_filtered_statuses(commit) - trigger_mergeable_check(commit, statuses) + state = trigger_mergeable_check(commit, statuses, set_if_green=True) + + # Process upstream StatusNames.SYNC + if ( + pr_info.head_ref.startswith(f"{SYNC_BRANCH_PREFIX}/pr/") + and GITHUB_REPOSITORY != GITHUB_UPSTREAM_REPOSITORY + ): + upstream_pr_number = int(pr_info.head_ref.split("/pr/", maxsplit=1)[1]) + update_upstream_sync_status( + upstream_pr_number, + pr_info.number, + gh, + state, + can_set_green_mergeable_status=True, + ) statuses = [s for s in statuses if s.context == StatusNames.CI] if not statuses: diff --git a/tests/ci/merge_pr.py b/tests/ci/merge_pr.py index 450ece62d4b..500de4eb718 100644 --- a/tests/ci/merge_pr.py +++ b/tests/ci/merge_pr.py @@ -13,7 +13,11 @@ from github.PaginatedList import PaginatedList from github.PullRequestReview import PullRequestReview from github.WorkflowRun import WorkflowRun -from commit_status_helper import get_commit_filtered_statuses +from commit_status_helper import ( + get_commit_filtered_statuses, + get_commit, + trigger_mergeable_check, +) from get_robot_token import get_best_robot_token from github_helper import GitHub, NamedUser, PullRequest, Repository from pr_info import PRInfo @@ -173,6 +177,17 @@ def parse_args() -> argparse.Namespace: action="store_true", help="if set, the script won't merge the PR, just check the conditions", ) + parser.add_argument( + "--set-ci-status", + action="store_true", + help="if set, only update/set Mergeable Check status", + ) + parser.add_argument( + "--wf-status", + type=str, + default="", + help="overall workflow status [success|failure]. used with --set-ci-status only", + ) parser.add_argument( "--check-approved", action="store_true", @@ -226,6 +241,21 @@ def main(): token = args.token or get_best_robot_token() gh = GitHub(token) repo = gh.get_repo(args.repo) + + if args.set_ci_status: + assert args.wf_status in ("failure", "success") + # set mergeable check status and exit + commit = get_commit(gh, args.pr_info.sha) + statuses = get_commit_filtered_statuses(commit) + trigger_mergeable_check( + commit, + statuses, + hide_url=False, + set_if_green=True, + workflow_failed=(args.wf_status != "success"), + ) + return + # An ugly and not nice fix to patch the wrong organization URL, # see https://github.com/PyGithub/PyGithub/issues/2395#issuecomment-1378629710 # pylint: disable=protected-access diff --git a/tests/integration/test_distributed_inter_server_secret/configs/users.d/new_user.xml b/tests/integration/test_distributed_inter_server_secret/configs/users.d/new_user.xml new file mode 100644 index 00000000000..a747d61a0dd --- /dev/null +++ b/tests/integration/test_distributed_inter_server_secret/configs/users.d/new_user.xml @@ -0,0 +1,12 @@ + + + + + + ::/0 + + default + default + + + diff --git a/tests/integration/test_distributed_inter_server_secret/test.py b/tests/integration/test_distributed_inter_server_secret/test.py index 10dbb23d961..50d7be4d11e 100644 --- a/tests/integration/test_distributed_inter_server_secret/test.py +++ b/tests/integration/test_distributed_inter_server_secret/test.py @@ -12,12 +12,16 @@ from helpers.cluster import ClickHouseCluster, CLICKHOUSE_CI_MIN_TESTED_VERSION cluster = ClickHouseCluster(__file__) -def make_instance(name, cfg, *args, **kwargs): +def make_instance(name, *args, **kwargs): + main_configs = kwargs.pop("main_configs", []) + main_configs.append("configs/remote_servers.xml") + user_configs = kwargs.pop("user_configs", []) + user_configs.append("configs/users.xml") return cluster.add_instance( name, with_zookeeper=True, - main_configs=["configs/remote_servers.xml", cfg], - user_configs=["configs/users.xml"], + main_configs=main_configs, + user_configs=user_configs, *args, **kwargs, ) @@ -27,11 +31,16 @@ def make_instance(name, cfg, *args, **kwargs): assert CLICKHOUSE_CI_MIN_TESTED_VERSION < "23.3" # _n1/_n2 contains cluster with different -- should fail -n1 = make_instance("n1", "configs/remote_servers_n1.xml") -n2 = make_instance("n2", "configs/remote_servers_n2.xml") +# only n1 contains new_user +n1 = make_instance( + "n1", + main_configs=["configs/remote_servers_n1.xml"], + user_configs=["configs/users.d/new_user.xml"], +) +n2 = make_instance("n2", main_configs=["configs/remote_servers_n2.xml"]) backward = make_instance( "backward", - "configs/remote_servers_backward.xml", + main_configs=["configs/remote_servers_backward.xml"], image="clickhouse/clickhouse-server", # version without DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET_V2 tag=CLICKHOUSE_CI_MIN_TESTED_VERSION, @@ -100,6 +109,12 @@ def bootstrap(): ) """ ) + n.query( + """ + CREATE TABLE dist_over_dist_secure AS data + Engine=Distributed(secure, currentDatabase(), dist_secure, key) + """ + ) @pytest.fixture(scope="module", autouse=True) @@ -432,3 +447,20 @@ def test_user_secure_cluster_from_backward(user, password): assert n1.contains_in_log( "Using deprecated interserver protocol because the client is too old. Consider upgrading all nodes in cluster." ) + + +def test_secure_cluster_distributed_over_distributed_different_users(): + # This works because we will have initial_user='default' + n1.query( + "SELECT * FROM remote('n1', currentDatabase(), dist_secure)", user="new_user" + ) + # While this is broken because now initial_user='new_user', and n2 does not has it + with pytest.raises(QueryRuntimeException): + n2.query( + "SELECT * FROM remote('n1', currentDatabase(), dist_secure, 'new_user')" + ) + # And this is still a problem, let's assume that this is OK, since we are + # expecting that in case of dist-over-dist the clusters are the same (users + # and stuff). + with pytest.raises(QueryRuntimeException): + n1.query("SELECT * FROM dist_over_dist_secure", user="new_user") diff --git a/tests/queries/0_stateless/02340_parts_refcnt_mergetree.reference b/tests/queries/0_stateless/02340_parts_refcnt_mergetree.reference index ae4fafae829..b06fee4af06 100644 --- a/tests/queries/0_stateless/02340_parts_refcnt_mergetree.reference +++ b/tests/queries/0_stateless/02340_parts_refcnt_mergetree.reference @@ -1,2 +1,2 @@ -data_02340 1_2_2_0 6 -data_02340_rep 1_0_0_0 6 +data_02340 1_2_2_0 1 +data_02340_rep 1_0_0_0 1 diff --git a/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh b/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh index 208a9038681..caa600298ce 100755 --- a/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh +++ b/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh @@ -9,40 +9,58 @@ function check_refcnt_for_table() { local table=$1 && shift - $CLICKHOUSE_CLIENT -q "system stop merges $table" + $CLICKHOUSE_CLIENT -nm -q " + system stop merges $table; + -- cleanup thread may hold the parts lock + system stop cleanup $table; + -- queue may hold the parts lock for awhile as well + system stop pulling replication log $table; + " $CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 -q "insert into $table select number, number%4 from numbers(200)" local query_id query_id="$table-$(random_str 10)" - SETTINGS="--format Null --max_threads 1 --max_block_size 1 --merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability 0.0" + local log_file + log_file=$(mktemp "$CUR_DIR/clickhouse-tests.XXXXXX.log") + local args=( + --format Null + --max_threads 1 + --max_block_size 1 + --merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability 0.0 + --query_id "$query_id" + --send_logs_level "test" + --server_logs_file "$log_file" + ) # Notes: - # - query may sleep 1*(200/4)=50 seconds maximum, it is enough to check system.parts + # - query may sleep 0.1*(200/4)=5 seconds maximum, it is enough to check system.parts # - "part = 1" condition should prune all parts except first # - max_block_size=1 with index_granularity=1 will allow to cancel the query earlier - $CLICKHOUSE_CLIENT $SETTINGS --query_id "$query_id" -q "select sleepEachRow(1) from $table where part = 1" & + $CLICKHOUSE_CLIENT "${args[@]}" -q "select sleepEachRow(0.1) from $table where part = 1" & PID=$! - # wait for query to be started - while [ "$($CLICKHOUSE_CLIENT -q "select count() from system.processes where query_id = '$query_id'")" -ne 1 ]; do - sleep 0.1 - done - # When the query only starts it execution it holds reference for each part, # however when it starts reading, partition pruning takes place, # and it should hold only parts that are required for SELECT # - # But to reach partition prune the function sleepEachRow() will be executed twice, - # so 2 seconds for sleepEachRow() and 3 seconds just to ensure that it enters the reading stage. - sleep $((2+3)) + # So let's wait while the reading will be started. + while ! grep -F -q -e "Exception" -e "MergeTreeRangeReader" "$log_file"; do + sleep 0.1 + done - # NOTE: parts that are used in query will have refcount increased for each range - $CLICKHOUSE_CLIENT -q "select table, name, refcount from system.parts where database = '$CLICKHOUSE_DATABASE' and table = '$table' and refcount > 1" + # NOTE: parts that are used in query will be holded in multiple places, and + # this is where magic 6 came from. Also there could be some other + # background threads (i.e. asynchronous metrics) that uses the part, so we + # simply filter parts not by "refcount > 1" but with some delta - "3", to + # avoid flakiness. + $CLICKHOUSE_CLIENT -q "select table, name, refcount>=6 from system.parts where database = '$CLICKHOUSE_DATABASE' and table = '$table' and refcount >= 3" # Kill the query gracefully. kill -INT $PID wait $PID + grep -F Exception "$log_file" | grep -v -F QUERY_WAS_CANCELLED + rm -f "${log_file:?}" } # NOTE: index_granularity=1 to cancel ASAP @@ -52,11 +70,13 @@ $CLICKHOUSE_CLIENT -nmq " create table data_02340 (key Int, part Int) engine=MergeTree() partition by part order by key settings index_granularity=1; " || exit 1 check_refcnt_for_table data_02340 +$CLICKHOUSE_CLIENT -q "drop table data_02340 sync" $CLICKHOUSE_CLIENT -nmq " drop table if exists data_02340_rep sync; create table data_02340_rep (key Int, part Int) engine=ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX', '1') partition by part order by key settings index_granularity=1; " || exit 1 check_refcnt_for_table data_02340_rep +$CLICKHOUSE_CLIENT -q "drop table data_02340_rep sync" exit 0 diff --git a/tests/queries/0_stateless/02841_parquet_filter_pushdown.sql b/tests/queries/0_stateless/02841_parquet_filter_pushdown.sql index 8521ada04d5..950485d53f0 100644 --- a/tests/queries/0_stateless/02841_parquet_filter_pushdown.sql +++ b/tests/queries/0_stateless/02841_parquet_filter_pushdown.sql @@ -8,10 +8,6 @@ set optimize_or_like_chain = 0; set max_block_size = 100000; set max_insert_threads = 1; --- Analyzer breaks the queries with IN and some queries with BETWEEN. --- TODO: Figure out why. -set allow_experimental_analyzer=0; - -- Try all the types. insert into function file('02841.parquet') -- Use negative numbers to test sign extension for signed types and lack of sign extension for diff --git a/tests/queries/0_stateless/02892_orc_filter_pushdown.sql b/tests/queries/0_stateless/02892_orc_filter_pushdown.sql index d319252f592..f9aa7696ac6 100644 --- a/tests/queries/0_stateless/02892_orc_filter_pushdown.sql +++ b/tests/queries/0_stateless/02892_orc_filter_pushdown.sql @@ -13,9 +13,6 @@ set max_insert_threads = 1; SET session_timezone = 'UTC'; --- Analyzer breaks the queries with IN and some queries with BETWEEN. -set allow_experimental_analyzer=0; - -- Try all the types. insert into function file('02892.orc') diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 2a9aa259fdd..60384125ec5 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -53,6 +53,8 @@ AutoFDO AutoML Autocompletion AvroConfluent +analysisOfVariance +ANOVA BIGINT BIGSERIAL BORO