mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 23:21:59 +00:00
Merge branch 'master' into master
This commit is contained in:
commit
877e27245e
2
.github/ISSUE_TEMPLATE/20_feature-request.md
vendored
2
.github/ISSUE_TEMPLATE/20_feature-request.md
vendored
@ -15,7 +15,7 @@ assignees: ''
|
||||
|
||||
**Use case**
|
||||
|
||||
> A clear and concise description of what is the intended usage scenario is.
|
||||
> A clear and concise description of what the intended usage scenario is.
|
||||
|
||||
**Describe the solution you'd like**
|
||||
|
||||
|
@ -11,6 +11,38 @@ option (ARCH_NATIVE "Add -march=native compiler flag. This makes your binaries n
|
||||
if (ARCH_NATIVE)
|
||||
set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=native")
|
||||
|
||||
# Populate the ENABLE_ option flags. This is required for the build of some third-party dependencies, specifically snappy, which
|
||||
# (somewhat weirdly) expects the relative SNAPPY_HAVE_ preprocessor variables to be populated, in addition to the microarchitecture
|
||||
# feature flags being enabled in the compiler. This fixes the ARCH_NATIVE flag by automatically populating the ENABLE_ option flags
|
||||
# according to the current CPU's capabilities, detected using clang.
|
||||
if (ARCH_AMD64)
|
||||
execute_process(
|
||||
COMMAND sh -c "clang -E - -march=native -###"
|
||||
INPUT_FILE /dev/null
|
||||
OUTPUT_QUIET
|
||||
ERROR_VARIABLE TEST_FEATURE_RESULT)
|
||||
|
||||
macro(TEST_AMD64_FEATURE TEST_FEATURE_RESULT feat flag)
|
||||
if (${TEST_FEATURE_RESULT} MATCHES "\"\\+${feat}\"")
|
||||
set(${flag} ON)
|
||||
else ()
|
||||
set(${flag} OFF)
|
||||
endif ()
|
||||
endmacro()
|
||||
|
||||
TEST_AMD64_FEATURE (${TEST_FEATURE_RESULT} ssse3 ENABLE_SSSE3)
|
||||
TEST_AMD64_FEATURE (${TEST_FEATURE_RESULT} sse4.1 ENABLE_SSE41)
|
||||
TEST_AMD64_FEATURE (${TEST_FEATURE_RESULT} sse4.2 ENABLE_SSE42)
|
||||
TEST_AMD64_FEATURE (${TEST_FEATURE_RESULT} vpclmulqdq ENABLE_PCLMULQDQ)
|
||||
TEST_AMD64_FEATURE (${TEST_FEATURE_RESULT} popcnt ENABLE_POPCNT)
|
||||
TEST_AMD64_FEATURE (${TEST_FEATURE_RESULT} avx ENABLE_AVX)
|
||||
TEST_AMD64_FEATURE (${TEST_FEATURE_RESULT} avx2 ENABLE_AVX2)
|
||||
TEST_AMD64_FEATURE (${TEST_FEATURE_RESULT} avx512f ENABLE_AVX512)
|
||||
TEST_AMD64_FEATURE (${TEST_FEATURE_RESULT} avx512vbmi ENABLE_AVX512_VBMI)
|
||||
TEST_AMD64_FEATURE (${TEST_FEATURE_RESULT} bmi ENABLE_BMI)
|
||||
TEST_AMD64_FEATURE (${TEST_FEATURE_RESULT} bmi2 ENABLE_BMI2)
|
||||
endif ()
|
||||
|
||||
elseif (ARCH_AARCH64)
|
||||
# ARM publishes almost every year a new revision of it's ISA [1]. Each version comes with new mandatory and optional features from
|
||||
# which CPU vendors can pick and choose. This creates a lot of variability ... We provide two build "profiles", one for maximum
|
||||
|
@ -1,4 +1,21 @@
|
||||
set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -D_LIBCPP_DEBUG=0") # More checks in debug build.
|
||||
if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG")
|
||||
# Enable libcxx debug mode: https://releases.llvm.org/15.0.0/projects/libcxx/docs/DesignDocs/DebugMode.html
|
||||
# The docs say the debug mode violates complexity guarantees, so do this only for Debug builds.
|
||||
# set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -D_LIBCPP_ENABLE_DEBUG_MODE=1")
|
||||
# ^^ Crashes the database upon startup, needs investigation.
|
||||
# Besides that, the implementation looks like a poor man's MSAN specific to libcxx. Since CI tests MSAN
|
||||
# anyways, we can keep the debug mode disabled.
|
||||
|
||||
# Libcxx also provides extra assertions:
|
||||
# --> https://releases.llvm.org/15.0.0/projects/libcxx/docs/UsingLibcxx.html#assertions-mode
|
||||
# These look orthogonal to the debug mode but the debug mode enables them implicitly:
|
||||
# --> https://github.com/llvm/llvm-project/blob/release/15.x/libcxx/include/__assert#L29
|
||||
# They are cheap and straightforward, so enable them in debug builds:
|
||||
set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -D_LIBCPP_ENABLE_ASSERTIONS=1")
|
||||
|
||||
# TODO Once we upgrade to LLVM 18+, reconsider all of the above as they introduced "hardening modes":
|
||||
# https://libcxx.llvm.org/Hardening.html
|
||||
endif ()
|
||||
|
||||
add_subdirectory(contrib/libcxxabi-cmake)
|
||||
add_subdirectory(contrib/libcxx-cmake)
|
||||
|
@ -1,6 +1,9 @@
|
||||
set(ABSL_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/abseil-cpp")
|
||||
set(ABSL_COMMON_INCLUDE_DIRS "${ABSL_ROOT_DIR}")
|
||||
|
||||
# To avoid errors "'X' does not refer to a value" while using `offsetof` function.
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
# This is a minimized version of the function definition in CMake/AbseilHelpers.cmake
|
||||
|
||||
#
|
||||
|
@ -5,6 +5,9 @@ if(NOT ENABLE_PROTOBUF)
|
||||
return()
|
||||
endif()
|
||||
|
||||
# To avoid errors "'X' does not refer to a value" while using `offsetof` function.
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
set(Protobuf_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/google-protobuf/src")
|
||||
if(OS_FREEBSD AND SANITIZE STREQUAL "address")
|
||||
# ../contrib/protobuf/src/google/protobuf/arena_impl.h:45:10: fatal error: 'sanitizer/asan_interface.h' file not found
|
||||
|
@ -6,6 +6,8 @@ if(NOT ENABLE_GRPC)
|
||||
return()
|
||||
endif()
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
set(_gRPC_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/grpc")
|
||||
set(_gRPC_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/grpc")
|
||||
|
||||
|
@ -22,7 +22,7 @@
|
||||
# limitations under the License.
|
||||
|
||||
# We want to use C++23, but GRPC is not ready
|
||||
set (CMAKE_CXX_STANDARD 20)
|
||||
set (CMAKE_CXX_STANDARD 17)
|
||||
|
||||
set(_gRPC_ZLIB_INCLUDE_DIR "")
|
||||
set(_gRPC_ZLIB_LIBRARIES ch_contrib::zlib)
|
||||
|
@ -196,7 +196,6 @@ When writing docs, you can use prepared templates. Copy the code of a template a
|
||||
Templates:
|
||||
|
||||
- [Function](_description_templates/template-function.md)
|
||||
- [Setting](_description_templates/template-setting.md)
|
||||
- [Server Setting](_description_templates/template-server-setting.md)
|
||||
- [Database or Table engine](_description_templates/template-engine.md)
|
||||
- [System table](_description_templates/template-system-table.md)
|
||||
|
@ -1,27 +0,0 @@
|
||||
## setting_name {#setting_name}
|
||||
|
||||
Description.
|
||||
|
||||
For the switch setting, use the typical phrase: “Enables or disables something ...”.
|
||||
|
||||
Possible values:
|
||||
|
||||
*For switcher setting:*
|
||||
|
||||
- 0 — Disabled.
|
||||
- 1 — Enabled.
|
||||
|
||||
*For another setting (typical phrases):*
|
||||
|
||||
- Positive integer.
|
||||
- 0 — Disabled or unlimited or something else.
|
||||
|
||||
Default value: `value`.
|
||||
|
||||
**Additional Info** (Optional)
|
||||
|
||||
The name of an additional section can be any, for example, **Usage**.
|
||||
|
||||
**See Also** (Optional)
|
||||
|
||||
- [link](#)
|
@ -1,11 +0,0 @@
|
||||
sudo apt-get install -y apt-transport-https ca-certificates dirmngr
|
||||
sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754
|
||||
|
||||
echo "deb https://packages.clickhouse.com/deb stable main" | sudo tee \
|
||||
/etc/apt/sources.list.d/clickhouse.list
|
||||
sudo apt-get update
|
||||
|
||||
sudo apt-get install -y clickhouse-server clickhouse-client
|
||||
|
||||
sudo service clickhouse-server start
|
||||
clickhouse-client # or "clickhouse-client --password" if you've set up a password.
|
@ -1,6 +0,0 @@
|
||||
sudo yum install -y yum-utils
|
||||
sudo yum-config-manager --add-repo https://packages.clickhouse.com/rpm/clickhouse.repo
|
||||
sudo yum install -y clickhouse-server clickhouse-client
|
||||
|
||||
sudo /etc/init.d/clickhouse-server start
|
||||
clickhouse-client # or "clickhouse-client --password" if you set up a password.
|
@ -1,32 +0,0 @@
|
||||
LATEST_VERSION=$(curl -s https://packages.clickhouse.com/tgz/stable/ | \
|
||||
grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | sort -V -r | head -n 1)
|
||||
export LATEST_VERSION
|
||||
|
||||
case $(uname -m) in
|
||||
x86_64) ARCH=amd64 ;;
|
||||
aarch64) ARCH=arm64 ;;
|
||||
*) echo "Unknown architecture $(uname -m)"; exit 1 ;;
|
||||
esac
|
||||
|
||||
for PKG in clickhouse-common-static clickhouse-common-static-dbg clickhouse-server clickhouse-client
|
||||
do
|
||||
curl -fO "https://packages.clickhouse.com/tgz/stable/$PKG-$LATEST_VERSION-${ARCH}.tgz" \
|
||||
|| curl -fO "https://packages.clickhouse.com/tgz/stable/$PKG-$LATEST_VERSION.tgz"
|
||||
done
|
||||
|
||||
tar -xzvf "clickhouse-common-static-$LATEST_VERSION-${ARCH}.tgz" \
|
||||
|| tar -xzvf "clickhouse-common-static-$LATEST_VERSION.tgz"
|
||||
sudo "clickhouse-common-static-$LATEST_VERSION/install/doinst.sh"
|
||||
|
||||
tar -xzvf "clickhouse-common-static-dbg-$LATEST_VERSION-${ARCH}.tgz" \
|
||||
|| tar -xzvf "clickhouse-common-static-dbg-$LATEST_VERSION.tgz"
|
||||
sudo "clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh"
|
||||
|
||||
tar -xzvf "clickhouse-server-$LATEST_VERSION-${ARCH}.tgz" \
|
||||
|| tar -xzvf "clickhouse-server-$LATEST_VERSION.tgz"
|
||||
sudo "clickhouse-server-$LATEST_VERSION/install/doinst.sh" configure
|
||||
sudo /etc/init.d/clickhouse-server start
|
||||
|
||||
tar -xzvf "clickhouse-client-$LATEST_VERSION-${ARCH}.tgz" \
|
||||
|| tar -xzvf "clickhouse-client-$LATEST_VERSION.tgz"
|
||||
sudo "clickhouse-client-$LATEST_VERSION/install/doinst.sh"
|
@ -63,7 +63,34 @@ Currently there are 3 ways to authenticate:
|
||||
- `SAS Token` - Can be used by providing an `endpoint`, `connection_string` or `storage_account_url`. It is identified by presence of '?' in the url.
|
||||
- `Workload Identity` - Can be used by providing an `endpoint` or `storage_account_url`. If `use_workload_identity` parameter is set in config, ([workload identity](https://github.com/Azure/azure-sdk-for-cpp/tree/main/sdk/identity/azure-identity#authenticate-azure-hosted-applications)) is used for authentication.
|
||||
|
||||
### Data cache {#data-cache}
|
||||
|
||||
`Azure` table engine supports data caching on local disk.
|
||||
See filesystem cache configuration options and usage in this [section](/docs/en/operations/storing-data.md/#using-local-cache).
|
||||
Caching is made depending on the path and ETag of the storage object, so clickhouse will not read a stale cache version.
|
||||
|
||||
To enable caching use a setting `filesystem_cache_name = '<name>'` and `enable_filesystem_cache = 1`.
|
||||
|
||||
```sql
|
||||
SELECT *
|
||||
FROM azureBlobStorage('DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite1:10000/devstoreaccount1/;', 'test_container', 'test_table', 'CSV')
|
||||
SETTINGS filesystem_cache_name = 'cache_for_azure', enable_filesystem_cache = 1;
|
||||
```
|
||||
|
||||
1. add the following section to clickhouse configuration file:
|
||||
|
||||
``` xml
|
||||
<clickhouse>
|
||||
<filesystem_caches>
|
||||
<cache_for_azure>
|
||||
<path>path to cache directory</path>
|
||||
<max_size>10Gi</max_size>
|
||||
</cache_for_azure>
|
||||
</filesystem_caches>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
2. reuse cache configuration (and therefore cache storage) from clickhouse `storage_configuration` section, [described here](/docs/en/operations/storing-data.md/#using-local-cache)
|
||||
|
||||
## See also
|
||||
|
||||
|
@ -48,6 +48,10 @@ Using named collections:
|
||||
CREATE TABLE deltalake ENGINE=DeltaLake(deltalake_conf, filename = 'test_table')
|
||||
```
|
||||
|
||||
### Data cache {#data-cache}
|
||||
|
||||
`Iceberg` table engine and table function support data caching same as `S3`, `AzureBlobStorage`, `HDFS` storages. See [here](../../../engines/table-engines/integrations/s3.md#data-cache).
|
||||
|
||||
## See also
|
||||
|
||||
- [deltaLake table function](../../../sql-reference/table-functions/deltalake.md)
|
||||
|
@ -6,7 +6,7 @@ sidebar_label: Iceberg
|
||||
|
||||
# Iceberg Table Engine
|
||||
|
||||
This engine provides a read-only integration with existing Apache [Iceberg](https://iceberg.apache.org/) tables in Amazon S3, Azure and locally stored tables.
|
||||
This engine provides a read-only integration with existing Apache [Iceberg](https://iceberg.apache.org/) tables in Amazon S3, Azure, HDFS and locally stored tables.
|
||||
|
||||
## Create Table
|
||||
|
||||
@ -19,13 +19,16 @@ CREATE TABLE iceberg_table_s3
|
||||
CREATE TABLE iceberg_table_azure
|
||||
ENGINE = IcebergAzure(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression])
|
||||
|
||||
CREATE TABLE iceberg_table_hdfs
|
||||
ENGINE = IcebergHDFS(path_to_table, [,format] [,compression_method])
|
||||
|
||||
CREATE TABLE iceberg_table_local
|
||||
ENGINE = IcebergLocal(path_to_table, [,format] [,compression_method])
|
||||
```
|
||||
|
||||
**Engine arguments**
|
||||
|
||||
Description of the arguments coincides with description of arguments in engines `S3`, `AzureBlobStorage` and `File` correspondingly.
|
||||
Description of the arguments coincides with description of arguments in engines `S3`, `AzureBlobStorage`, `HDFS` and `File` correspondingly.
|
||||
`format` stands for the format of data files in the Iceberg table.
|
||||
|
||||
Engine parameters can be specified using [Named Collections](../../../operations/named-collections.md)
|
||||
@ -60,6 +63,10 @@ CREATE TABLE iceberg_table ENGINE=IcebergS3(iceberg_conf, filename = 'test_table
|
||||
|
||||
Table engine `Iceberg` is an alias to `IcebergS3` now.
|
||||
|
||||
### Data cache {#data-cache}
|
||||
|
||||
`Iceberg` table engine and table function support data caching same as `S3`, `AzureBlobStorage`, `HDFS` storages. See [here](../../../engines/table-engines/integrations/s3.md#data-cache).
|
||||
|
||||
## See also
|
||||
|
||||
- [iceberg table function](/docs/en/sql-reference/table-functions/iceberg.md)
|
||||
|
@ -4,12 +4,8 @@ sidebar_position: 138
|
||||
sidebar_label: MySQL
|
||||
---
|
||||
|
||||
import CloudAvailableBadge from '@theme/badges/CloudAvailableBadge';
|
||||
|
||||
# MySQL Table Engine
|
||||
|
||||
<CloudAvailableBadge />
|
||||
|
||||
The MySQL engine allows you to perform `SELECT` and `INSERT` queries on data that is stored on a remote MySQL server.
|
||||
|
||||
## Creating a Table {#creating-a-table}
|
||||
|
@ -26,6 +26,7 @@ SELECT * FROM s3_engine_table LIMIT 2;
|
||||
│ two │ 2 │
|
||||
└──────┴───────┘
|
||||
```
|
||||
|
||||
## Create Table {#creating-a-table}
|
||||
|
||||
``` sql
|
||||
@ -43,6 +44,37 @@ CREATE TABLE s3_engine_table (name String, value UInt32)
|
||||
- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. For more information see [Using S3 for Data Storage](../mergetree-family/mergetree.md#table_engine-mergetree-s3).
|
||||
- `compression` — Compression type. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. Parameter is optional. By default, it will auto-detect compression by file extension.
|
||||
|
||||
### Data cache {#data-cache}
|
||||
|
||||
`S3` table engine supports data caching on local disk.
|
||||
See filesystem cache configuration options and usage in this [section](/docs/en/operations/storing-data.md/#using-local-cache).
|
||||
Caching is made depending on the path and ETag of the storage object, so clickhouse will not read a stale cache version.
|
||||
|
||||
To enable caching use a setting `filesystem_cache_name = '<name>'` and `enable_filesystem_cache = 1`.
|
||||
|
||||
```sql
|
||||
SELECT *
|
||||
FROM s3('http://minio:10000/clickhouse//test_3.csv', 'minioadmin', 'minioadminpassword', 'CSV')
|
||||
SETTINGS filesystem_cache_name = 'cache_for_s3', enable_filesystem_cache = 1;
|
||||
```
|
||||
|
||||
There are two ways to define cache in configuration file.
|
||||
|
||||
1. add the following section to clickhouse configuration file:
|
||||
|
||||
``` xml
|
||||
<clickhouse>
|
||||
<filesystem_caches>
|
||||
<cache_for_s3>
|
||||
<path>path to cache directory</path>
|
||||
<max_size>10Gi</max_size>
|
||||
</cache_for_s3>
|
||||
</filesystem_caches>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
2. reuse cache configuration (and therefore cache storage) from clickhouse `storage_configuration` section, [described here](/docs/en/operations/storing-data.md/#using-local-cache)
|
||||
|
||||
### PARTITION BY
|
||||
|
||||
`PARTITION BY` — Optional. In most cases you don't need a partition key, and if it is needed you generally don't need a partition key more granular than by month. Partitioning does not speed up queries (in contrast to the ORDER BY expression). You should never use too granular partitioning. Don't partition your data by client identifiers or names (instead, make client identifier or name the first column in the ORDER BY expression).
|
||||
|
@ -31,6 +31,10 @@ The table must be enabled in the server configuration, see the `opentelemetry_sp
|
||||
|
||||
The tags or attributes are saved as two parallel arrays, containing the keys and values. Use [ARRAY JOIN](../sql-reference/statements/select/array-join.md) to work with them.
|
||||
|
||||
## Log-query-settings
|
||||
|
||||
ClickHouse allows you to log changes to query settings during query execution. When enabled, any modifications made to query settings will be recorded in the OpenTelemetry span log. This feature is particularly useful in production environments for tracking configuration changes that may affect query performance.
|
||||
|
||||
## Integration with monitoring systems
|
||||
|
||||
At the moment, there is no ready tool that can export the tracing data from ClickHouse to a monitoring system.
|
||||
|
@ -413,14 +413,6 @@ Default value: 0
|
||||
|
||||
Experimental data deduplication for SELECT queries based on part UUIDs
|
||||
|
||||
## allow_experimental_refreshable_materialized_view {#allow_experimental_refreshable_materialized_view}
|
||||
|
||||
Type: Bool
|
||||
|
||||
Default value: 0
|
||||
|
||||
Allow refreshable materialized views (CREATE MATERIALIZED VIEW \\<name\\> REFRESH ...).
|
||||
|
||||
## allow_experimental_shared_set_join {#allow_experimental_shared_set_join}
|
||||
|
||||
Type: Bool
|
||||
@ -540,6 +532,14 @@ Default value: 0
|
||||
|
||||
Allow non-const timezone arguments in certain time-related functions like toTimeZone(), fromUnixTimestamp*(), snowflakeToDateTime*()
|
||||
|
||||
## log_query_settings {#log-query-settings}
|
||||
|
||||
Type: Bool
|
||||
|
||||
Default value: 1
|
||||
|
||||
Log query settings into the query_log and opentelemetry_span_log.
|
||||
|
||||
## allow_nondeterministic_mutations {#allow_nondeterministic_mutations}
|
||||
|
||||
Type: Bool
|
||||
@ -9382,7 +9382,7 @@ Type: Bool
|
||||
|
||||
Default value: 0
|
||||
|
||||
Traverse shadow directory when query system.remote_data_paths
|
||||
Traverse frozen data (shadow directory) in addition to actual table data when query `system.remote_data_paths`.
|
||||
|
||||
## union_default_mode {#union_default_mode}
|
||||
|
||||
@ -9729,6 +9729,10 @@ Default value: 15
|
||||
|
||||
The heartbeat interval in seconds to indicate watch query is alive.
|
||||
|
||||
## enforce_strict_identifier_format
|
||||
|
||||
If enabled, only allow identifiers containing alphanumeric characters and underscores.
|
||||
|
||||
## workload {#workload}
|
||||
|
||||
Type: String
|
||||
|
@ -261,9 +261,10 @@ windowFunnel(window, [mode, [mode, ... ]])(timestamp, cond1, cond2, ..., condN)
|
||||
|
||||
- `window` — Length of the sliding window, it is the time interval between the first and the last condition. The unit of `window` depends on the `timestamp` itself and varies. Determined using the expression `timestamp of cond1 <= timestamp of cond2 <= ... <= timestamp of condN <= timestamp of cond1 + window`.
|
||||
- `mode` — It is an optional argument. One or more modes can be set.
|
||||
- `'strict_deduplication'` — If the same condition holds for the sequence of events, then such repeating event interrupts further processing.
|
||||
- `'strict_deduplication'` — If the same condition holds for the sequence of events, then such repeating event interrupts further processing. Note: it may work unexpectedly if several conditions hold for the same event.
|
||||
- `'strict_order'` — Don't allow interventions of other events. E.g. in the case of `A->B->D->C`, it stops finding `A->B->C` at the `D` and the max event level is 2.
|
||||
- `'strict_increase'` — Apply conditions only to events with strictly increasing timestamps.
|
||||
- `'strict_once'` — Count each event only once in the chain even if it meets the condition several times
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -490,7 +491,7 @@ Where:
|
||||
|
||||
## uniqUpTo(N)(x)
|
||||
|
||||
Calculates the number of different values of the argument up to a specified limit, `N`. If the number of different argument values is greater than `N`, this function returns `N` + 1, otherwise it calculates the exact value.
|
||||
Calculates the number of different values of the argument up to a specified limit, `N`. If the number of different argument values is greater than `N`, this function returns `N` + 1, otherwise it calculates the exact value.
|
||||
|
||||
Recommended for use with small `N`s, up to 10. The maximum value of `N` is 100.
|
||||
|
||||
@ -522,7 +523,7 @@ This function behaves the same as [sumMap](../../sql-reference/aggregate-functio
|
||||
- `keys`: [Array](../data-types/array.md) of keys.
|
||||
- `values`: [Array](../data-types/array.md) of values.
|
||||
|
||||
**Returned Value**
|
||||
**Returned Value**
|
||||
|
||||
- Returns a tuple of two arrays: keys in sorted order, and values summed for the corresponding keys.
|
||||
|
||||
@ -539,10 +540,10 @@ CREATE TABLE sum_map
|
||||
)
|
||||
ENGINE = Log
|
||||
|
||||
INSERT INTO sum_map VALUES
|
||||
('2000-01-01', '2000-01-01 00:00:00', [1, 2, 3], [10, 10, 10]),
|
||||
INSERT INTO sum_map VALUES
|
||||
('2000-01-01', '2000-01-01 00:00:00', [1, 2, 3], [10, 10, 10]),
|
||||
('2000-01-01', '2000-01-01 00:00:00', [3, 4, 5], [10, 10, 10]),
|
||||
('2000-01-01', '2000-01-01 00:01:00', [4, 5, 6], [10, 10, 10]),
|
||||
('2000-01-01', '2000-01-01 00:01:00', [4, 5, 6], [10, 10, 10]),
|
||||
('2000-01-01', '2000-01-01 00:01:00', [6, 7, 8], [10, 10, 10]);
|
||||
```
|
||||
|
||||
@ -572,7 +573,7 @@ This function behaves the same as [sumMap](../../sql-reference/aggregate-functio
|
||||
- `keys`: [Array](../data-types/array.md) of keys.
|
||||
- `values`: [Array](../data-types/array.md) of values.
|
||||
|
||||
**Returned Value**
|
||||
**Returned Value**
|
||||
|
||||
- Returns a tuple of two arrays: keys in sorted order, and values summed for the corresponding keys.
|
||||
|
||||
@ -591,10 +592,10 @@ CREATE TABLE sum_map
|
||||
)
|
||||
ENGINE = Log
|
||||
|
||||
INSERT INTO sum_map VALUES
|
||||
('2000-01-01', '2000-01-01 00:00:00', [1, 2, 3], [10, 10, 10]),
|
||||
INSERT INTO sum_map VALUES
|
||||
('2000-01-01', '2000-01-01 00:00:00', [1, 2, 3], [10, 10, 10]),
|
||||
('2000-01-01', '2000-01-01 00:00:00', [3, 4, 5], [10, 10, 10]),
|
||||
('2000-01-01', '2000-01-01 00:01:00', [4, 5, 6], [10, 10, 10]),
|
||||
('2000-01-01', '2000-01-01 00:01:00', [4, 5, 6], [10, 10, 10]),
|
||||
('2000-01-01', '2000-01-01 00:01:00', [6, 7, 8], [10, 10, 10]);
|
||||
```
|
||||
|
||||
|
@ -1,190 +0,0 @@
|
||||
---
|
||||
slug: /en/sql-reference/ansi
|
||||
sidebar_position: 40
|
||||
sidebar_label: ANSI Compatibility
|
||||
title: "ANSI SQL Compatibility of ClickHouse SQL Dialect"
|
||||
---
|
||||
|
||||
:::note
|
||||
This article relies on Table 38, “Feature taxonomy and definition for mandatory features”, Annex F of [ISO/IEC CD 9075-2:2011](https://www.iso.org/obp/ui/#iso:std:iso-iec:9075:-2:ed-4:v1:en:sec:8).
|
||||
:::
|
||||
|
||||
## Differences in Behaviour
|
||||
|
||||
The following table lists cases when query feature works in ClickHouse, but behaves not as specified in ANSI SQL.
|
||||
|
||||
| Feature ID | Feature Name | Difference |
|
||||
|------------|-----------------------------|-----------------------------------------------------------------------------------------------------------|
|
||||
| E011 | Numeric data types | Numeric literal with period is interpreted as approximate (`Float64`) instead of exact (`Decimal`) |
|
||||
| E051-05 | Select items can be renamed | Item renames have a wider visibility scope than just the SELECT result |
|
||||
| E141-01 | NOT NULL constraints | `NOT NULL` is implied for table columns by default |
|
||||
| E011-04 | Arithmetic operators | ClickHouse overflows instead of checked arithmetic and changes the result data type based on custom rules |
|
||||
|
||||
## Feature Status
|
||||
|
||||
| Feature ID | Feature Name | Status | Comment |
|
||||
|------------|--------------------------------------------------------------------------------------------------------------------------|----------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| **E011** | **Numeric data types** | <span class="text-warning">Partial</span> | |
|
||||
| E011-01 | INTEGER and SMALLINT data types | <span class="text-success">Yes</span> | |
|
||||
| E011-02 | REAL, DOUBLE PRECISION and FLOAT data types data types | <span class="text-success">Yes</span> | |
|
||||
| E011-03 | DECIMAL and NUMERIC data types | <span class="text-success">Yes</span> | |
|
||||
| E011-04 | Arithmetic operators | <span class="text-success">Yes</span> | |
|
||||
| E011-05 | Numeric comparison | <span class="text-success">Yes</span> | |
|
||||
| E011-06 | Implicit casting among the numeric data types | <span class="text-danger">No</span> | ANSI SQL allows arbitrary implicit cast between numeric types, while ClickHouse relies on functions having multiple overloads instead of implicit cast |
|
||||
| **E021** | **Character string types** | <span class="text-warning">Partial</span> | |
|
||||
| E021-01 | CHARACTER data type | <span class="text-success">Yes</span> | |
|
||||
| E021-02 | CHARACTER VARYING data type | <span class="text-success">Yes</span> | |
|
||||
| E021-03 | Character literals | <span class="text-success">Yes</span> | |
|
||||
| E021-04 | CHARACTER_LENGTH function | <span class="text-warning">Partial</span> | No `USING` clause |
|
||||
| E021-05 | OCTET_LENGTH function | <span class="text-danger">No</span> | `LENGTH` behaves similarly |
|
||||
| E021-06 | SUBSTRING | <span class="text-warning">Partial</span> | No support for `SIMILAR` and `ESCAPE` clauses, no `SUBSTRING_REGEX` variant |
|
||||
| E021-07 | Character concatenation | <span class="text-warning">Partial</span> | No `COLLATE` clause |
|
||||
| E021-08 | UPPER and LOWER functions | <span class="text-success">Yes</span> | |
|
||||
| E021-09 | TRIM function | <span class="text-success">Yes</span> | |
|
||||
| E021-10 | Implicit casting among the fixed-length and variable-length character string types | <span class="text-warning">Partial</span> | ANSI SQL allows arbitrary implicit cast between string types, while ClickHouse relies on functions having multiple overloads instead of implicit cast |
|
||||
| E021-11 | POSITION function | <span class="text-warning">Partial</span> | No support for `IN` and `USING` clauses, no `POSITION_REGEX` variant |
|
||||
| E021-12 | Character comparison | <span class="text-success">Yes</span> | |
|
||||
| **E031** | **Identifiers** | <span class="text-warning">Partial</span>| |
|
||||
| E031-01 | Delimited identifiers | <span class="text-warning">Partial</span> | Unicode literal support is limited |
|
||||
| E031-02 | Lower case identifiers | <span class="text-success">Yes</span> | |
|
||||
| E031-03 | Trailing underscore | <span class="text-success">Yes</span> | |
|
||||
| **E051** | **Basic query specification** | <span class="text-warning">Partial</span>| |
|
||||
| E051-01 | SELECT DISTINCT | <span class="text-success">Yes</span> | |
|
||||
| E051-02 | GROUP BY clause | <span class="text-success">Yes</span> | |
|
||||
| E051-04 | GROUP BY can contain columns not in `<select list>` | <span class="text-success">Yes</span> | |
|
||||
| E051-05 | Select items can be renamed | <span class="text-success">Yes</span> | |
|
||||
| E051-06 | HAVING clause | <span class="text-success">Yes</span> | |
|
||||
| E051-07 | Qualified \* in select list | <span class="text-success">Yes</span> | |
|
||||
| E051-08 | Correlation name in the FROM clause | <span class="text-success">Yes</span> | |
|
||||
| E051-09 | Rename columns in the FROM clause | <span class="text-danger">No</span> | |
|
||||
| **E061** | **Basic predicates and search conditions** | <span class="text-warning">Partial</span> | |
|
||||
| E061-01 | Comparison predicate | <span class="text-success">Yes</span> | |
|
||||
| E061-02 | BETWEEN predicate | <span class="text-warning">Partial</span> | No `SYMMETRIC` and `ASYMMETRIC` clause |
|
||||
| E061-03 | IN predicate with list of values | <span class="text-success">Yes</span> | |
|
||||
| E061-04 | LIKE predicate | <span class="text-success">Yes</span> | |
|
||||
| E061-05 | LIKE predicate: ESCAPE clause | <span class="text-danger">No</span> | |
|
||||
| E061-06 | NULL predicate | <span class="text-success">Yes</span> | |
|
||||
| E061-07 | Quantified comparison predicate | <span class="text-danger">No</span> | |
|
||||
| E061-08 | EXISTS predicate | <span class="text-danger">No</span> | |
|
||||
| E061-09 | Subqueries in comparison predicate | <span class="text-success">Yes</span> | |
|
||||
| E061-11 | Subqueries in IN predicate | <span class="text-success">Yes</span> | |
|
||||
| E061-12 | Subqueries in quantified comparison predicate | <span class="text-danger">No</span> | |
|
||||
| E061-13 | Correlated subqueries | <span class="text-danger">No</span> | |
|
||||
| E061-14 | Search condition | <span class="text-success">Yes</span> | |
|
||||
| **E071** | **Basic query expressions** | <span class="text-warning">Partial</span> | |
|
||||
| E071-01 | UNION DISTINCT table operator | <span class="text-success">Yes</span> | |
|
||||
| E071-02 | UNION ALL table operator | <span class="text-success">Yes</span> | |
|
||||
| E071-03 | EXCEPT DISTINCT table operator | <span class="text-danger">No</span> | |
|
||||
| E071-05 | Columns combined via table operators need not have exactly the same data type | <span class="text-success">Yes</span> | |
|
||||
| E071-06 | Table operators in subqueries | <span class="text-success">Yes</span> | |
|
||||
| **E081** | **Basic privileges** | <span class="text-success">Yes</span> |
|
||||
| E081-01 | SELECT privilege at the table level | <span class="text-success">Yes</span> |
|
||||
| E081-02 | DELETE privilege | |
|
||||
| E081-03 | INSERT privilege at the table level | <span class="text-success">Yes</span> |
|
||||
| E081-04 | UPDATE privilege at the table level | <span class="text-success">Yes</span> |
|
||||
| E081-05 | UPDATE privilege at the column level | |
|
||||
| E081-06 | REFERENCES privilege at the table level | | |
|
||||
| E081-07 | REFERENCES privilege at the column level | | |
|
||||
| E081-08 | WITH GRANT OPTION | <span class="text-success">Yes</span> | |
|
||||
| E081-09 | USAGE privilege | | |
|
||||
| E081-10 | EXECUTE privilege | | |
|
||||
| **E091** | **Set functions** |<span class="text-success">Yes</span> |
|
||||
| E091-01 | AVG | <span class="text-success">Yes</span> | |
|
||||
| E091-02 | COUNT | <span class="text-success">Yes</span> | |
|
||||
| E091-03 | MAX | <span class="text-success">Yes</span> | |
|
||||
| E091-04 | MIN | <span class="text-success">Yes</span> | |
|
||||
| E091-05 | SUM | <span class="text-success">Yes</span> | |
|
||||
| E091-06 | ALL quantifier | <span class="text-success">Yes</span> | |
|
||||
| E091-07 | DISTINCT quantifier | <span class="text-success">Yes</span> | Not all aggregate functions supported |
|
||||
| **E101** | **Basic data manipulation** | <span class="text-warning">Partial</span> | |
|
||||
| E101-01 | INSERT statement | <span class="text-success">Yes</span> | Note: primary key in ClickHouse does not imply the `UNIQUE` constraint |
|
||||
| E101-03 | Searched UPDATE statement | <span class="text-warning">Partial</span> | There’s an `ALTER UPDATE` statement for batch data modification |
|
||||
| E101-04 | Searched DELETE statement | <span class="text-warning">Partial</span> | There’s an `ALTER DELETE` statement for batch data removal |
|
||||
| **E111** | **Single row SELECT statement** | <span class="text-danger">No</span> | |
|
||||
| **E121** | **Basic cursor support** | <span class="text-danger">No</span> | |
|
||||
| E121-01 | DECLARE CURSOR | <span class="text-danger">No</span> | |
|
||||
| E121-02 | ORDER BY columns need not be in select list | <span class="text-success">Yes</span> | |
|
||||
| E121-03 | Value expressions in ORDER BY clause | <span class="text-success">Yes</span> | |
|
||||
| E121-04 | OPEN statement | <span class="text-danger">No</span> | |
|
||||
| E121-06 | Positioned UPDATE statement | <span class="text-danger">No</span> | |
|
||||
| E121-07 | Positioned DELETE statement | <span class="text-danger">No</span> | |
|
||||
| E121-08 | CLOSE statement | <span class="text-danger">No</span> | |
|
||||
| E121-10 | FETCH statement: implicit NEXT | <span class="text-danger">No</span> | |
|
||||
| E121-17 | WITH HOLD cursors | <span class="text-danger">No</span> | |
|
||||
| **E131** | **Null value support (nulls in lieu of values)** | <span class="text-success">Yes</span> | Some restrictions apply |
|
||||
| **E141** | **Basic integrity constraints** | <span class="text-warning">Partial</span> | |
|
||||
| E141-01 | NOT NULL constraints | <span class="text-success">Yes</span> | Note: `NOT NULL` is implied for table columns by default |
|
||||
| E141-02 | UNIQUE constraint of NOT NULL columns | <span class="text-danger">No</span> | |
|
||||
| E141-03 | PRIMARY KEY constraints | <span class="text-warning">Partial</span> | |
|
||||
| E141-04 | Basic FOREIGN KEY constraint with the NO ACTION default for both referential delete action and referential update action | <span class="text-danger">No</span> | |
|
||||
| E141-06 | CHECK constraint | <span class="text-success">Yes</span> | |
|
||||
| E141-07 | Column defaults | <span class="text-success">Yes</span> | |
|
||||
| E141-08 | NOT NULL inferred on PRIMARY KEY | <span class="text-success">Yes</span> | |
|
||||
| E141-10 | Names in a foreign key can be specified in any order | <span class="text-danger">No</span> | |
|
||||
| **E151** | **Transaction support** | <span class="text-danger">No</span> | |
|
||||
| E151-01 | COMMIT statement | <span class="text-danger">No</span> | |
|
||||
| E151-02 | ROLLBACK statement | <span class="text-danger">No</span> | |
|
||||
| **E152** | **Basic SET TRANSACTION statement** | <span class="text-danger">No</span> | |
|
||||
| E152-01 | SET TRANSACTION statement: ISOLATION LEVEL SERIALIZABLE clause | <span class="text-danger">No</span> | |
|
||||
| E152-02 | SET TRANSACTION statement: READ ONLY and READ WRITE clauses | <span class="text-danger">No</span> | |
|
||||
| **E153** | **Updatable queries with subqueries** | <span class="text-success">Yes</span> | |
|
||||
| **E161** | **SQL comments using leading double minus** | <span class="text-success">Yes</span> | |
|
||||
| **E171** | **SQLSTATE support** | <span class="text-danger">No</span> | |
|
||||
| **E182** | **Host language binding** | <span class="text-danger">No</span> | |
|
||||
| **F031** | **Basic schema manipulation** | <span class="text-warning">Partial</span>| |
|
||||
| F031-01 | CREATE TABLE statement to create persistent base tables | <span class="text-warning">Partial</span> | No `SYSTEM VERSIONING`, `ON COMMIT`, `GLOBAL`, `LOCAL`, `PRESERVE`, `DELETE`, `REF IS`, `WITH OPTIONS`, `UNDER`, `LIKE`, `PERIOD FOR` clauses and no support for user resolved data types |
|
||||
| F031-02 | CREATE VIEW statement | <span class="text-warning">Partial</span> | No `RECURSIVE`, `CHECK`, `UNDER`, `WITH OPTIONS` clauses and no support for user resolved data types |
|
||||
| F031-03 | GRANT statement | <span class="text-success">Yes</span> | |
|
||||
| F031-04 | ALTER TABLE statement: ADD COLUMN clause | <span class="text-success">Yes</span> | No support for `GENERATED` clause and system time period |
|
||||
| F031-13 | DROP TABLE statement: RESTRICT clause | <span class="text-danger">No</span> | |
|
||||
| F031-16 | DROP VIEW statement: RESTRICT clause | <span class="text-danger">No</span> | |
|
||||
| F031-19 | REVOKE statement: RESTRICT clause | <span class="text-danger">No</span> | |
|
||||
| **F041** | **Basic joined table** | <span class="text-warning">Partial</span> | |
|
||||
| F041-01 | Inner join (but not necessarily the INNER keyword) | <span class="text-success">Yes</span> | |
|
||||
| F041-02 | INNER keyword | <span class="text-success">Yes</span> | |
|
||||
| F041-03 | LEFT OUTER JOIN | <span class="text-success">Yes</span> | |
|
||||
| F041-04 | RIGHT OUTER JOIN | <span class="text-success">Yes</span> | |
|
||||
| F041-05 | Outer joins can be nested | <span class="text-success">Yes</span> | |
|
||||
| F041-07 | The inner table in a left or right outer join can also be used in an inner join | <span class="text-success">Yes</span> | |
|
||||
| F041-08 | All comparison operators are supported (rather than just =) | <span class="text-danger">No</span> | |
|
||||
| **F051** | **Basic date and time** | <span class="text-warning">Partial</span> | |
|
||||
| F051-01 | DATE data type (including support of DATE literal) | <span class="text-success">Yes</span> | |
|
||||
| F051-02 | TIME data type (including support of TIME literal) with fractional seconds precision of at least 0 | <span class="text-danger">No</span> | |
|
||||
| F051-03 | TIMESTAMP data type (including support of TIMESTAMP literal) with fractional seconds precision of at least 0 and 6 | <span class="text-success">Yes</span> | |
|
||||
| F051-04 | Comparison predicate on DATE, TIME, and TIMESTAMP data types | <span class="text-success">Yes</span> | |
|
||||
| F051-05 | Explicit CAST between datetime types and character string types | <span class="text-success">Yes</span> | |
|
||||
| F051-06 | CURRENT_DATE | <span class="text-danger">No</span> | `today()` is similar |
|
||||
| F051-07 | LOCALTIME | <span class="text-danger">No</span> | `now()` is similar |
|
||||
| F051-08 | LOCALTIMESTAMP | <span class="text-danger">No</span> | |
|
||||
| **F081** | **UNION and EXCEPT in views** | <span class="text-warning">Partial</span> | |
|
||||
| **F131** | **Grouped operations** | <span class="text-warning">Partial</span> | |
|
||||
| F131-01 | WHERE, GROUP BY, and HAVING clauses supported in queries with grouped views | <span class="text-success">Yes</span> | |
|
||||
| F131-02 | Multiple tables supported in queries with grouped views | <span class="text-success">Yes</span> | |
|
||||
| F131-03 | Set functions supported in queries with grouped views | <span class="text-success">Yes</span> | |
|
||||
| F131-04 | Subqueries with GROUP BY and HAVING clauses and grouped views | <span class="text-success">Yes</span> | |
|
||||
| F131-05 | Single row SELECT with GROUP BY and HAVING clauses and grouped views | <span class="text-danger">No</span> | |
|
||||
| **F181** | **Multiple module support** | <span class="text-danger">No</span> | |
|
||||
| **F201** | **CAST function** | <span class="text-success">Yes</span> | |
|
||||
| **F221** | **Explicit defaults** | <span class="text-danger">No</span> | |
|
||||
| **F261** | **CASE expression** | <span class="text-success">Yes</span> | |
|
||||
| F261-01 | Simple CASE | <span class="text-success">Yes</span> | |
|
||||
| F261-02 | Searched CASE | <span class="text-success">Yes</span> | |
|
||||
| F261-03 | NULLIF | <span class="text-success">Yes</span> | |
|
||||
| F261-04 | COALESCE | <span class="text-success">Yes</span> | |
|
||||
| **F311** | **Schema definition statement** | <span class="text-warning">Partial</span> | |
|
||||
| F311-01 | CREATE SCHEMA | <span class="text-warning">Partial</span> | See CREATE DATABASE |
|
||||
| F311-02 | CREATE TABLE for persistent base tables | <span class="text-success">Yes</span> | |
|
||||
| F311-03 | CREATE VIEW | <span class="text-success">Yes</span> | |
|
||||
| F311-04 | CREATE VIEW: WITH CHECK OPTION | <span class="text-danger">No</span> | |
|
||||
| F311-05 | GRANT statement | <span class="text-success">Yes</span> | |
|
||||
| **F471** | **Scalar subquery values** | <span class="text-success">Yes</span> | |
|
||||
| **F481** | **Expanded NULL predicate** | <span class="text-success">Yes</span> | |
|
||||
| **F812** | **Basic flagging** | <span class="text-danger">No</span> | |
|
||||
| **S011** | **Distinct data types** | | |
|
||||
| **T321** | **Basic SQL-invoked routines** | <span class="text-danger">No</span> | |
|
||||
| T321-01 | User-defined functions with no overloading | <span class="text-danger">No</span> | |
|
||||
| T321-02 | User-defined stored procedures with no overloading | <span class="text-danger">No</span> | |
|
||||
| T321-03 | Function invocation | <span class="text-danger">No</span> | |
|
||||
| T321-04 | CALL statement | <span class="text-danger">No</span> | |
|
||||
| T321-05 | RETURN statement | <span class="text-danger">No</span> | |
|
||||
| **T631** | **IN predicate with one list element** | <span class="text-success">Yes</span> | |
|
@ -2933,7 +2933,42 @@ The same as ‘today() - 1’.
|
||||
|
||||
## timeSlot
|
||||
|
||||
Rounds the time to the half hour.
|
||||
Round the time to the start of a half-an-hour length interval.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
timeSlot(time[, time_zone])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `time` — Time to round to the start of a half-an-hour length interval. [DateTime](../data-types/datetime.md)/[Date32](../data-types/date32.md)/[DateTime64](../data-types/datetime64.md).
|
||||
- `time_zone` — A String type const value or an expression representing the time zone. [String](../data-types/string.md).
|
||||
|
||||
:::note
|
||||
Though this function can take values of the extended types `Date32` and `DateTime64` as an argument, passing it a time outside the normal range (year 1970 to 2149 for `Date` / 2106 for `DateTime`) will produce wrong results.
|
||||
:::
|
||||
|
||||
**Return type**
|
||||
|
||||
- Returns the time rounded to the start of a half-an-hour length interval. [DateTime](../data-types/datetime.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT timeSlot(toDateTime('2000-01-02 03:04:05', 'UTC'));
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─timeSlot(toDateTime('2000-01-02 03:04:05', 'UTC'))─┐
|
||||
│ 2000-01-02 03:00:00 │
|
||||
└────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## toYYYYMM
|
||||
|
||||
|
@ -5261,9 +5261,9 @@ SELECT toFixedString('foo', 8) AS s;
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─s─────────────┬─s_cut─┐
|
||||
│ foo\0\0\0\0\0 │ foo │
|
||||
└───────────────┴───────┘
|
||||
┌─s─────────────┐
|
||||
│ foo\0\0\0\0\0 │
|
||||
└───────────────┘
|
||||
```
|
||||
|
||||
## toStringCutToZero
|
||||
|
@ -6,7 +6,7 @@ sidebar_label: iceberg
|
||||
|
||||
# iceberg Table Function
|
||||
|
||||
Provides a read-only table-like interface to Apache [Iceberg](https://iceberg.apache.org/) tables in Amazon S3, Azure or locally stored.
|
||||
Provides a read-only table-like interface to Apache [Iceberg](https://iceberg.apache.org/) tables in Amazon S3, Azure, HDFS or locally stored.
|
||||
|
||||
## Syntax
|
||||
|
||||
@ -17,13 +17,16 @@ icebergS3(named_collection[, option=value [,..]])
|
||||
icebergAzure(connection_string|storage_account_url, container_name, blobpath, [,account_name], [,account_key] [,format] [,compression_method])
|
||||
icebergAzure(named_collection[, option=value [,..]])
|
||||
|
||||
icebergHDFS(path_to_table, [,format] [,compression_method])
|
||||
icebergHDFS(named_collection[, option=value [,..]])
|
||||
|
||||
icebergLocal(path_to_table, [,format] [,compression_method])
|
||||
icebergLocal(named_collection[, option=value [,..]])
|
||||
```
|
||||
|
||||
## Arguments
|
||||
|
||||
Description of the arguments coincides with description of arguments in table functions `s3`, `azureBlobStorage` and `file` correspondingly.
|
||||
Description of the arguments coincides with description of arguments in table functions `s3`, `azureBlobStorage`, `HDFS` and `file` correspondingly.
|
||||
`format` stands for the format of data files in the Iceberg table.
|
||||
|
||||
**Returned value**
|
||||
@ -36,7 +39,7 @@ SELECT * FROM icebergS3('http://test.s3.amazonaws.com/clickhouse-bucket/test_tab
|
||||
```
|
||||
|
||||
:::important
|
||||
ClickHouse currently supports reading v1 and v2 of the Iceberg format via the `icebergS3`, `icebergAzure` and `icebergLocal` table functions and `IcebergS3`, `icebergAzure` ans `icebergLocal` table engines.
|
||||
ClickHouse currently supports reading v1 and v2 of the Iceberg format via the `icebergS3`, `icebergAzure`, `icebergHDFS` and `icebergLocal` table functions and `IcebergS3`, `icebergAzure`, `IcebergHDFS` and `IcebergLocal` table engines.
|
||||
:::
|
||||
|
||||
## Defining a named collection
|
||||
|
@ -1,2 +0,0 @@
|
||||
# Just an empty yaml file. Keep it alone.
|
||||
{}
|
@ -33,7 +33,7 @@ sidebar_label: "Отличительные возможности ClickHouse"
|
||||
|
||||
## Поддержка SQL {#sql-support}
|
||||
|
||||
ClickHouse поддерживает [декларативный язык запросов на основе SQL](../sql-reference/index.md) и во [многих случаях](../sql-reference/ansi.mdx) совпадающий с SQL-стандартом.
|
||||
ClickHouse поддерживает декларативный язык запросов SQL.
|
||||
|
||||
Поддерживаются [GROUP BY](../sql-reference/statements/select/group-by.md), [ORDER BY](../sql-reference/statements/select/order-by.md), подзапросы в секциях [FROM](../sql-reference/statements/select/from.md), [IN](../sql-reference/operators/in.md), [JOIN](../sql-reference/statements/select/join.md), [функции window](../sql-reference/window-functions/index.mdx), а также скалярные подзапросы.
|
||||
|
||||
|
@ -30,7 +30,7 @@ sidebar_label: "Настройки пользователей"
|
||||
<profile>profile_name</profile>
|
||||
|
||||
<quota>default</quota>
|
||||
<default_database>default<default_database>
|
||||
<default_database>default</default_database>
|
||||
<databases>
|
||||
<database_name>
|
||||
<table_name>
|
||||
|
@ -1,10 +0,0 @@
|
||||
---
|
||||
slug: /ru/sql-reference/ansi
|
||||
sidebar_position: 40
|
||||
sidebar_label: ANSI Compatibility
|
||||
title: "ANSI Compatibility"
|
||||
---
|
||||
|
||||
import Content from '@site/docs/en/sql-reference/ansi.md';
|
||||
|
||||
<Content />
|
@ -37,7 +37,7 @@ ClickHouse会使用服务器上一切可用的资源,从而以最自然的方
|
||||
|
||||
## 支持SQL {#zhi-chi-sql}
|
||||
|
||||
ClickHouse支持一种[基于SQL的声明式查询语言](../sql-reference/index.md),它在许多情况下与[ANSI SQL标准](../sql-reference/ansi.md)相同。
|
||||
ClickHouse支持一种基于SQL的声明式查询语言。
|
||||
|
||||
支持的查询[GROUP BY](../sql-reference/statements/select/group-by.md), [ORDER BY](../sql-reference/statements/select/order-by.md), [FROM](../sql-reference/statements/select/from.md), [JOIN](../sql-reference/statements/select/join.md), [IN](../sql-reference/operators/in.md)以及非相关子查询。
|
||||
|
||||
|
@ -1,191 +0,0 @@
|
||||
---
|
||||
slug: /zh/sql-reference/ansi
|
||||
sidebar_position: 40
|
||||
sidebar_label: "ANSI\u517C\u5BB9\u6027"
|
||||
---
|
||||
|
||||
# ClickHouse SQL方言 与ANSI SQL的兼容性{#ansi-sql-compatibility-of-clickhouse-sql-dialect}
|
||||
|
||||
:::note
|
||||
本文参考Annex G所著的[ISO/IEC CD 9075-2:2011](https://www.iso.org/obp/ui/#iso:std:iso-iec:9075:-2:ed-4:v1:en:sec:8)标准.
|
||||
:::
|
||||
|
||||
## 行为差异 {#differences-in-behaviour}
|
||||
|
||||
下表列出了ClickHouse能够使用,但与ANSI SQL规定有差异的查询特性。
|
||||
|
||||
| 功能ID | 功能名称 | 差异 |
|
||||
| ------- | --------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| E011 | 数值型数据类型 | 带小数点的数字被视为近似值 (`Float64`)而不是精确值 (`Decimal`) |
|
||||
| E051-05 | SELECT 的列可以重命名 | 字段重命名的作用范围不限于进行重命名的SELECT子查询(参考[表达式别名](https://clickhouse.com/docs/zh/sql-reference/syntax/#notes-on-usage)) |
|
||||
| E141-01 | NOT NULL(非空)约束 | ClickHouse表中每一列默认为`NOT NULL` |
|
||||
| E011-04 | 算术运算符 | ClickHouse在运算时会进行溢出,而不是四舍五入。此外会根据自定义规则修改结果数据类型(参考[溢出检查](https://clickhouse.com/docs/zh/sql-reference/data-types/decimal/#yi-chu-jian-cha)) |
|
||||
|
||||
## 功能状态 {#feature-status}
|
||||
|
||||
| 功能ID | 功能名称 | 状态 | 注释 |
|
||||
| -------- | ---------------------------------------------------------------------------------------- | ----------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| **E011** | **数值型数据类型** | **部分**{.text-warning} | |
|
||||
| E011-01 | INTEGER (整型)和SMALLINT (小整型)数据类型 | 是 {.text-success} | |
|
||||
| E011-02 | REAL (实数)、DOUBLE PRECISION (双精度浮点数)和FLOAT(单精度浮点数)数据类型数据类型 | 是 {.text-success} | |
|
||||
| E011-03 | DECIMAL (精确数字)和NUMERIC (精确数字)数据类型 | 是 {.text-success} | |
|
||||
| E011-04 | 算术运算符 | 是 {.text-success} | |
|
||||
| E011-05 | 数值比较 | 是 {.text-success} | |
|
||||
| E011-06 | 数值数据类型之间的隐式转换 | 否 {.text-danger} | ANSI SQL允许在数值类型之间进行任意隐式转换,而ClickHouse针对不同数据类型有对应的比较函数和类型转换函数 |
|
||||
| **E021** | **字符串类型** | **部分**{.text-warning} | |
|
||||
| E021-01 | CHARACTER (字符串)数据类型 | 是 {.text-success} | |
|
||||
| E021-02 | CHARACTER VARYING (可变字符串)数据类型 | 是 {.text-success} | |
|
||||
| E021-03 | 字符字面量 | 是 {.text-success} | |
|
||||
| E021-04 | CHARACTER_LENGTH 函数 | 部分 {.text-warning} | 不支持 `using` 从句 |
|
||||
| E021-05 | OCTET_LENGTH 函数 | 否 {.text-danger} | 使用 `LENGTH` 函数代替 |
|
||||
| E021-06 | SUBSTRING | 部分 {.text-warning} | 不支持 `SIMILAR` 和 `ESCAPE` 从句,没有`SUBSTRING_REGEX` 函数 |
|
||||
| E021-07 | 字符串拼接 | 部分 {.text-warning} | 不支持 `COLLATE` 从句 |
|
||||
| E021-08 | 大小写转换 | 是 {.text-success} | |
|
||||
| E021-09 | 裁剪字符串 | 是 {.text-success} | |
|
||||
| E021-10 | 固定长度和可变长度字符串类型之间的隐式转换 | 部分 {.text-warning} | ANSI SQL允许在数据类型之间进行任意隐式转换,而ClickHouse针对不同数据类型有对应的比较函数和类型转换函数 |
|
||||
| E021-11 | POSITION 函数 | 部分 {.text-warning} | 不支持 `IN` 和 `USING` 从句,不支持`POSITION_REGEX`函数 |
|
||||
| E021-12 | 字符串比较 | 是 {.text-success} | |
|
||||
| **E031** | **标识符** | **部分**{.text-warning} | |
|
||||
| E031-01 | 分隔标识符 | 部分 {.text-warning} | Unicode文字支持有限 |
|
||||
| E031-02 | 小写标识符 | 是 {.text-success} | |
|
||||
| E031-03 | 标识符最后加下划线 | 是 {.text-success} | |
|
||||
| **E051** | **基本查询规范** | **部分**{.text-warning} | |
|
||||
| E051-01 | SELECT DISTINCT | 是 {.text-success} | |
|
||||
| E051-02 | GROUP BY 从句 | 是 {.text-success} | |
|
||||
| E051-04 | GROUP BY 从句中的列可以包含不在 `<select list>`中出现的列 | 是 {.text-success} | |
|
||||
| E051-05 | SELECT 的列可以重命名 | 是 {.text-success} | |
|
||||
| E051-06 | HAVING 从句 | 是 {.text-success} | |
|
||||
| E051-07 | SELECT 选择的列中允许出现\* | 是 {.text-success} | |
|
||||
| E051-08 | FROM 从句中的关联名称 | 是 {.text-success} | |
|
||||
| E051-09 | 重命名 FROM 从句中的列 | 否 {.text-danger} | |
|
||||
| **E061** | **基本谓词和搜索条件** | **部分**{.text-warning} | |
|
||||
| E061-01 | 比较谓词 | 是 {.text-success} | |
|
||||
| E061-02 | BETWEEN 谓词 | 部分 {.text-warning} | 不支持 `SYMMETRIC` 和 `ASYMMETRIC` 从句 |
|
||||
| E061-03 | IN 谓词后可接值列表 | 是 {.text-success} | |
|
||||
| E061-04 | LIKE 谓词 | 是 {.text-success} | |
|
||||
| E061-05 | LIKE 谓词后接 ESCAPE 从句 | 否 {.text-danger} | |
|
||||
| E061-06 | NULL 谓词 | 是 {.text-success} | |
|
||||
| E061-07 | 量化比较谓词(ALL、SOME、ANY) | 否 {.text-danger} | |
|
||||
| E061-08 | EXISTS 谓词 | 否 {.text-danger} | |
|
||||
| E061-09 | 比较谓词中的子查询 | 是 {.text-success} | |
|
||||
| E061-11 | IN 谓词中的子查询 | 是 {.text-success} | |
|
||||
| E061-12 | 量化比较谓词(BETWEEN、IN、LIKE)中的子查询 | 否 {.text-danger} | |
|
||||
| E061-13 | 相关子查询 | 否 {.text-danger} | |
|
||||
| E061-14 | 搜索条件 | 是 {.text-success} | |
|
||||
| **E071** | **基本查询表达式** | **部分**{.text-warning} | |
|
||||
| E071-01 | UNION DISTINCT 表运算符 | 是 {.text-success} | |
|
||||
| E071-02 | UNION ALL 表运算符 | 是 {.text-success} | |
|
||||
| E071-03 | EXCEPT DISTINCT 表运算符 | 否 {.text-danger} | |
|
||||
| E071-05 | 通过表运算符组合的列不必具有完全相同的数据类型 | 是 {.text-success} | |
|
||||
| E071-06 | 子查询中的表运算符 | 是 {.text-success} | |
|
||||
| **E081** | **基本权限** | **是**{.text-success} | |
|
||||
| E081-01 | 表级别的SELECT(查询)权限 | 是 {.text-success} | |
|
||||
| E081-02 | DELETE(删除)权限 | 是 {.text-success} | |
|
||||
| E081-03 | 表级别的INSERT(插入)权限 | 是 {.text-success} | |
|
||||
| E081-04 | 表级别的UPDATE(更新)权限 | 是 {.text-success} | |
|
||||
| E081-05 | 列级别的UPDATE(更新)权限 | 是 {.text-success} | |
|
||||
| E081-06 | 表级别的REFERENCES(引用)权限 | 是 {.text-success} | |
|
||||
| E081-07 | 列级别的REFERENCES(引用)权限 | 是 {.text-success} | |
|
||||
| E081-08 | WITH GRANT OPTION | 是 {.text-success} | |
|
||||
| E081-09 | USAGE(使用)权限 | 是 {.text-success} | |
|
||||
| E081-10 | EXECUTE(执行)权限 | 是 {.text-success} | |
|
||||
| **E091** | **集合函数** | **是**{.text-success} | |
|
||||
| E091-01 | AVG | 是 {.text-success} | |
|
||||
| E091-02 | COUNT | 是 {.text-success} | |
|
||||
| E091-03 | MAX | 是 {.text-success} | |
|
||||
| E091-04 | MIN | 是 {.text-success} | |
|
||||
| E091-05 | SUM | 是 {.text-success} | |
|
||||
| E091-06 | ALL修饰词 | 否。 {.text-danger} | |
|
||||
| E091-07 | DISTINCT修饰词 | 是 {.text-success} | 并非所有聚合函数都支持该修饰词 |
|
||||
| **E101** | **基本数据操作** | **部分**{.text-warning} | |
|
||||
| E101-01 | INSERT(插入)语句 | 是 {.text-success} | 注:ClickHouse中的主键并不隐含`UNIQUE` 约束 |
|
||||
| E101-03 | 可指定范围的UPDATE(更新)语句 | 部分 {.text-warning} | `ALTER UPDATE` 语句用来批量更新数据 |
|
||||
| E101-04 | 可指定范围的DELETE(删除)语句 | 部分 {.text-warning} | `ALTER DELETE` 语句用来批量删除数据 |
|
||||
| **E111** | **返回一行的SELECT语句** | **否**{.text-danger} | |
|
||||
| **E121** | **基本游标支持** | **否**{.text-danger} | |
|
||||
| E121-01 | DECLARE CURSOR | 否 {.text-danger} | |
|
||||
| E121-02 | ORDER BY 涉及的列不需要出现在SELECT的列中 | 是 {.text-success} | |
|
||||
| E121-03 | ORDER BY 从句中的表达式 | 是 {.text-success} | |
|
||||
| E121-04 | OPEN 语句 | 否 {.text-danger} | |
|
||||
| E121-06 | 受游标位置控制的 UPDATE 语句 | 否 {.text-danger} | |
|
||||
| E121-07 | 受游标位置控制的 DELETE 语句 | 否 {.text-danger} | |
|
||||
| E121-08 | CLOSE 语句 | 否 {.text-danger} | |
|
||||
| E121-10 | FETCH 语句中包含隐式NEXT | 否 {.text-danger} | |
|
||||
| E121-17 | WITH HOLD 游标 | 否 {.text-danger} | |
|
||||
| **E131** | **空值支持** | **是**{.text-success} | 有部分限制 |
|
||||
| **E141** | **基本完整性约束** | **部分**{.text-warning} | |
|
||||
| E141-01 | NOT NULL(非空)约束 | 是 {.text-success} | 注: 默认情况下ClickHouse表中的列隐含`NOT NULL`约束 |
|
||||
| E141-02 | NOT NULL(非空)列的UNIQUE(唯一)约束 | 否 {.text-danger} | |
|
||||
| E141-03 | 主键约束 | 部分 {.text-warning} | |
|
||||
| E141-04 | 对于引用删除和引用更新操作,基本的FOREIGN KEY(外键)约束默认不进行任何操作(NO ACTION) | 否 {.text-danger} | |
|
||||
| E141-06 | CHECK(检查)约束 | 是 {.text-success} | |
|
||||
| E141-07 | 列默认值 | 是 {.text-success} | |
|
||||
| E141-08 | 在主键上推断非空 | 是 {.text-success} | |
|
||||
| E141-10 | 可以按任何顺序指定外键中的名称 | 否 {.text-danger} | |
|
||||
| **E151** | **事务支持** | **否**{.text-danger} | |
|
||||
| E151-01 | COMMIT(提交)语句 | 否 {.text-danger} | |
|
||||
| E151-02 | ROLLBACK(回滚)语句 | 否 {.text-danger} | |
|
||||
| **E152** | **基本的SET TRANSACTION(设置事务隔离级别)语句** | **否**{.text-danger} | |
|
||||
| E152-01 | SET TRANSACTION语句:ISOLATION LEVEL SERIALIZABLE(隔离级别为串行化)从句 | 否 {.text-danger} | |
|
||||
| E152-02 | SET TRANSACTION语句:READ ONLY(只读)和READ WRITE(读写)从句 | 否 {.text-danger} | |
|
||||
| **E153** | **具有子查询的可更新查询** | **是**{.text-success} | |
|
||||
| **E161** | **使用“--”符号作为SQL注释** | **是**{.text-success} | |
|
||||
| **E171** | **SQLSTATE支持** | **否**{.text-danger} | |
|
||||
| **E182** | **主机语言绑定** | **否**{.text-danger} | |
|
||||
| **F031** | **基本架构操作** | **部分**{.text-warning} | |
|
||||
| F031-01 | 使用 CREATE TABLE 语句创建持久表 | 部分 {.text-warning} | 不支持 `SYSTEM VERSIONING`, `ON COMMIT`, `GLOBAL`, `LOCAL`, `PRESERVE`, `DELETE`, `REF IS`, `WITH OPTIONS`, `UNDER`, `LIKE`, `PERIOD FOR` 从句,不支持用户解析的数据类型 |
|
||||
| F031-02 | CREATE VIEW(创建视图)语句 | 部分 {.text-warning} | 不支持 `RECURSIVE`, `CHECK`, `UNDER`, `WITH OPTIONS` 从句,不支持用户解析的数据类型 |
|
||||
| F031-03 | GRANT(授权)语句 | 是 {.text-success} | |
|
||||
| F031-04 | ALTER TABLE语句:ADD COLUMN从句 | 是 {.text-success} | 不支持 `GENERATED` 从句和以系统时间做参数 |
|
||||
| F031-13 | DROP TABLE语句:RESTRICT从句 | 否 {.text-danger} | |
|
||||
| F031-16 | DROP VIEW语句:RESTRICT子句 | 否 {.text-danger} | |
|
||||
| F031-19 | REVOKE语句:RESTRICT子句 | 否 {.text-danger} | |
|
||||
| **F041** | **基本连接关系** | **部分**{.text-warning} | |
|
||||
| F041-01 | Inner join(但不一定是INNER关键字) | 是 {.text-success} | |
|
||||
| F041-02 | INNER 关键字 | 是 {.text-success} | |
|
||||
| F041-03 | LEFT OUTER JOIN | 是 {.text-success} | |
|
||||
| F041-04 | RIGHT OUTER JOIN | 是 {.text-success} | |
|
||||
| F041-05 | 外连接可嵌套 | 是 {.text-success} | |
|
||||
| F041-07 | 左外部连接或右外连接中的内部表也可用于内部联接 | 是 {.text-success} | |
|
||||
| F041-08 | 支持所有比较运算符(而不仅仅是=) | 否 {.text-danger} | |
|
||||
| **F051** | **基本日期和时间** | **部分**{.text-warning} | |
|
||||
| F051-01 | DATE(日期)数据类型(并支持用于表达日期的字面量) | 是 {.text-success} | |
|
||||
| F051-02 | TIME(时间)数据类型(并支持用于表达时间的字面量),小数秒精度至少为0 | 否 {.text-danger} | |
|
||||
| F051-03 | 时间戳数据类型(并支持用于表达时间戳的字面量),小数秒精度至少为0和6 | 是 {.text-danger} | |
|
||||
| F051-04 | 日期、时间和时间戳数据类型的比较谓词 | 是 {.text-success} | |
|
||||
| F051-05 | DateTime 类型和字符串形式表达的时间之间的显式转换 | 是 {.text-success} | |
|
||||
| F051-06 | CURRENT_DATE | 否 {.text-danger} | 使用`today()`替代 |
|
||||
| F051-07 | LOCALTIME | 否 {.text-danger} | 使用`now()`替代 |
|
||||
| F051-08 | LOCALTIMESTAMP | 否 {.text-danger} | |
|
||||
| **F081** | **视图的UNION和EXCEPT操作** | **部分**{.text-warning} | |
|
||||
| **F131** | **分组操作** | **部分**{.text-warning} | |
|
||||
| F131-01 | 在具有分组视图的查询中支持 WHERE、GROUP BY 和 HAVING 子句 | 是 {.text-success} | |
|
||||
| F131-02 | 在分组视图中支持多张表 | 是 {.text-success} | |
|
||||
| F131-03 | 分组视图的查询中支持集合函数 | 是 {.text-success} | |
|
||||
| F131-04 | 带有 `GROUP BY` 和 `HAVING` 从句,以及分组视图的子查询 | 是 {.text-success} | |
|
||||
| F131-05 | 带有 `GROUP BY` 和 `HAVING` 从句,以及分组视图的仅返回1条记录的SELECT查询 | 否 {.text-danger} | |
|
||||
| **F181** | **多模块支持** | **否**{.text-danger} | |
|
||||
| **F201** | **CAST 函数** | **是**{.text-success} | |
|
||||
| **F221** | **显式默认值** | **否**{.text-danger} | |
|
||||
| **F261** | **CASE 表达式** | **是**{.text-success} | |
|
||||
| F261-01 | 简单 CASE 表达式 | 是 {.text-success} | |
|
||||
| F261-02 | 搜索型 CASE 表达式 | 是 {.text-success} | |
|
||||
| F261-03 | NULLIF | 是 {.text-success} | |
|
||||
| F261-04 | COALESCE | 是 {.text-success} | |
|
||||
| **F311** | **架构定义语句** | **部分**{.text-warning} | |
|
||||
| F311-01 | CREATE SCHEMA | 部分 {.text-warning} | 见`CREATE DATABASE` |
|
||||
| F311-02 | 用于创建持久表的 CREATE TABLE | 是 {.text-success} | |
|
||||
| F311-03 | CREATE VIEW | 是 {.text-success} | |
|
||||
| F311-04 | CREATE VIEW: WITH CHECK OPTION | 否 {.text-danger} | |
|
||||
| F311-05 | GRANT 语句 | 是 {.text-success} | |
|
||||
| **F471** | **标量子查询** | **是**{.text-success} | |
|
||||
| **F481** | **扩展 NULL 谓词** | **是**{.text-success} | |
|
||||
| **F812** | **基本标志位** | **否**{.text-danger} |
|
||||
| **S011** | **用于不重复数据的数据类型** | **否**{.text-danger} |
|
||||
| **T321** | **基本的SQL调用例程** | **否**{.text-danger} | |
|
||||
| T321-01 | 没有重载的用户定义函数 | 否{.text-danger} | |
|
||||
| T321-02 | 没有重载的用户定义存储过程 | 否{.text-danger} | |
|
||||
| T321-03 | 功能调用 | 否 {.text-danger} | |
|
||||
| T321-04 | CALL 语句 | 否 {.text-danger} | |
|
||||
| T321-05 | RETURN 语句 | 否 {.text-danger} | |
|
||||
| **T631** | **IN 谓词后接一个列表** | **是**{.text-success} | |
|
@ -1496,6 +1496,8 @@ try
|
||||
|
||||
NamedCollectionFactory::instance().loadIfNot();
|
||||
|
||||
FileCacheFactory::instance().loadDefaultCaches(config());
|
||||
|
||||
/// Initialize main config reloader.
|
||||
std::string include_from_path = config().getString("include_from", "/etc/metrika.xml");
|
||||
|
||||
|
@ -117,20 +117,20 @@ bool operator ==(const AuthenticationData & lhs, const AuthenticationData & rhs)
|
||||
}
|
||||
|
||||
|
||||
void AuthenticationData::setPassword(const String & password_)
|
||||
void AuthenticationData::setPassword(const String & password_, bool validate)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case AuthenticationType::PLAINTEXT_PASSWORD:
|
||||
setPasswordHashBinary(Util::stringToDigest(password_));
|
||||
setPasswordHashBinary(Util::stringToDigest(password_), validate);
|
||||
return;
|
||||
|
||||
case AuthenticationType::SHA256_PASSWORD:
|
||||
setPasswordHashBinary(Util::encodeSHA256(password_));
|
||||
setPasswordHashBinary(Util::encodeSHA256(password_), validate);
|
||||
return;
|
||||
|
||||
case AuthenticationType::DOUBLE_SHA1_PASSWORD:
|
||||
setPasswordHashBinary(Util::encodeDoubleSHA1(password_));
|
||||
setPasswordHashBinary(Util::encodeDoubleSHA1(password_), validate);
|
||||
return;
|
||||
|
||||
case AuthenticationType::BCRYPT_PASSWORD:
|
||||
@ -149,12 +149,12 @@ void AuthenticationData::setPassword(const String & password_)
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "setPassword(): authentication type {} not supported", toString(type));
|
||||
}
|
||||
|
||||
void AuthenticationData::setPasswordBcrypt(const String & password_, int workfactor_)
|
||||
void AuthenticationData::setPasswordBcrypt(const String & password_, int workfactor_, bool validate)
|
||||
{
|
||||
if (type != AuthenticationType::BCRYPT_PASSWORD)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot specify bcrypt password for authentication type {}", toString(type));
|
||||
|
||||
setPasswordHashBinary(Util::encodeBcrypt(password_, workfactor_));
|
||||
setPasswordHashBinary(Util::encodeBcrypt(password_, workfactor_), validate);
|
||||
}
|
||||
|
||||
String AuthenticationData::getPassword() const
|
||||
@ -165,7 +165,7 @@ String AuthenticationData::getPassword() const
|
||||
}
|
||||
|
||||
|
||||
void AuthenticationData::setPasswordHashHex(const String & hash)
|
||||
void AuthenticationData::setPasswordHashHex(const String & hash, bool validate)
|
||||
{
|
||||
Digest digest;
|
||||
digest.resize(hash.size() / 2);
|
||||
@ -179,7 +179,7 @@ void AuthenticationData::setPasswordHashHex(const String & hash)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot read password hash in hex, check for valid characters [0-9a-fA-F] and length");
|
||||
}
|
||||
|
||||
setPasswordHashBinary(digest);
|
||||
setPasswordHashBinary(digest, validate);
|
||||
}
|
||||
|
||||
|
||||
@ -195,7 +195,7 @@ String AuthenticationData::getPasswordHashHex() const
|
||||
}
|
||||
|
||||
|
||||
void AuthenticationData::setPasswordHashBinary(const Digest & hash)
|
||||
void AuthenticationData::setPasswordHashBinary(const Digest & hash, bool validate)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
@ -217,7 +217,7 @@ void AuthenticationData::setPasswordHashBinary(const Digest & hash)
|
||||
|
||||
case AuthenticationType::DOUBLE_SHA1_PASSWORD:
|
||||
{
|
||||
if (hash.size() != 20)
|
||||
if (validate && hash.size() != 20)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Password hash for the 'DOUBLE_SHA1_PASSWORD' authentication type has length {} "
|
||||
"but must be exactly 20 bytes.", hash.size());
|
||||
@ -231,7 +231,7 @@ void AuthenticationData::setPasswordHashBinary(const Digest & hash)
|
||||
/// However the library we use to encode it requires hash string to be 64 characters long,
|
||||
/// so we also allow the hash of this length.
|
||||
|
||||
if (hash.size() != 59 && hash.size() != 60 && hash.size() != 64)
|
||||
if (validate && hash.size() != 59 && hash.size() != 60 && hash.size() != 64)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Password hash for the 'BCRYPT_PASSWORD' authentication type has length {} "
|
||||
"but must be 59 or 60 bytes.", hash.size());
|
||||
@ -240,10 +240,13 @@ void AuthenticationData::setPasswordHashBinary(const Digest & hash)
|
||||
resized.resize(64);
|
||||
|
||||
#if USE_BCRYPT
|
||||
/// Verify that it is a valid hash
|
||||
int ret = bcrypt_checkpw("", reinterpret_cast<const char *>(resized.data()));
|
||||
if (ret == -1)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Could not decode the provided hash with 'bcrypt_hash'");
|
||||
if (validate)
|
||||
{
|
||||
/// Verify that it is a valid hash
|
||||
int ret = bcrypt_checkpw("", reinterpret_cast<const char *>(resized.data()));
|
||||
if (ret == -1)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Could not decode the provided hash with 'bcrypt_hash'");
|
||||
}
|
||||
#endif
|
||||
|
||||
password_hash = hash;
|
||||
@ -385,7 +388,7 @@ std::shared_ptr<ASTAuthenticationData> AuthenticationData::toAST() const
|
||||
}
|
||||
|
||||
|
||||
AuthenticationData AuthenticationData::fromAST(const ASTAuthenticationData & query, ContextPtr context, bool check_password_rules)
|
||||
AuthenticationData AuthenticationData::fromAST(const ASTAuthenticationData & query, ContextPtr context, bool validate)
|
||||
{
|
||||
if (query.type && query.type == AuthenticationType::NO_PASSWORD)
|
||||
return AuthenticationData();
|
||||
@ -431,7 +434,7 @@ AuthenticationData AuthenticationData::fromAST(const ASTAuthenticationData & que
|
||||
if (!query.type && !context)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot get default password type without context");
|
||||
|
||||
if (check_password_rules && !context)
|
||||
if (validate && !context)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot check password complexity rules without context");
|
||||
|
||||
if (query.type == AuthenticationType::BCRYPT_PASSWORD && !context)
|
||||
@ -448,13 +451,13 @@ AuthenticationData AuthenticationData::fromAST(const ASTAuthenticationData & que
|
||||
|
||||
AuthenticationData auth_data(current_type);
|
||||
|
||||
if (check_password_rules)
|
||||
if (validate)
|
||||
context->getAccessControl().checkPasswordComplexityRules(value);
|
||||
|
||||
if (query.type == AuthenticationType::BCRYPT_PASSWORD)
|
||||
{
|
||||
int workfactor = context->getAccessControl().getBcryptWorkfactor();
|
||||
auth_data.setPasswordBcrypt(value, workfactor);
|
||||
auth_data.setPasswordBcrypt(value, workfactor, validate);
|
||||
return auth_data;
|
||||
}
|
||||
|
||||
@ -486,7 +489,7 @@ AuthenticationData AuthenticationData::fromAST(const ASTAuthenticationData & que
|
||||
#endif
|
||||
}
|
||||
|
||||
auth_data.setPassword(value);
|
||||
auth_data.setPassword(value, validate);
|
||||
return auth_data;
|
||||
}
|
||||
|
||||
@ -498,11 +501,11 @@ AuthenticationData AuthenticationData::fromAST(const ASTAuthenticationData & que
|
||||
|
||||
if (query.type == AuthenticationType::BCRYPT_PASSWORD)
|
||||
{
|
||||
auth_data.setPasswordHashBinary(AuthenticationData::Util::stringToDigest(value));
|
||||
auth_data.setPasswordHashBinary(AuthenticationData::Util::stringToDigest(value), validate);
|
||||
return auth_data;
|
||||
}
|
||||
|
||||
auth_data.setPasswordHashHex(value);
|
||||
auth_data.setPasswordHashHex(value, validate);
|
||||
|
||||
|
||||
if (query.type == AuthenticationType::SHA256_PASSWORD && args_size == 2)
|
||||
|
@ -31,17 +31,17 @@ public:
|
||||
AuthenticationType getType() const { return type; }
|
||||
|
||||
/// Sets the password and encrypt it using the authentication type set in the constructor.
|
||||
void setPassword(const String & password_);
|
||||
void setPassword(const String & password_, bool validate);
|
||||
|
||||
/// Returns the password. Allowed to use only for Type::PLAINTEXT_PASSWORD.
|
||||
String getPassword() const;
|
||||
|
||||
/// Sets the password as a string of hexadecimal digits.
|
||||
void setPasswordHashHex(const String & hash);
|
||||
void setPasswordHashHex(const String & hash, bool validate);
|
||||
String getPasswordHashHex() const;
|
||||
|
||||
/// Sets the password in binary form.
|
||||
void setPasswordHashBinary(const Digest & hash);
|
||||
void setPasswordHashBinary(const Digest & hash, bool validate);
|
||||
const Digest & getPasswordHashBinary() const { return password_hash; }
|
||||
|
||||
/// Sets the salt in String form.
|
||||
@ -49,7 +49,7 @@ public:
|
||||
String getSalt() const;
|
||||
|
||||
/// Sets the password using bcrypt hash with specified workfactor
|
||||
void setPasswordBcrypt(const String & password_, int workfactor_);
|
||||
void setPasswordBcrypt(const String & password_, int workfactor_, bool validate);
|
||||
|
||||
/// Sets the server name for authentication type LDAP.
|
||||
const String & getLDAPServerName() const { return ldap_server_name; }
|
||||
@ -77,7 +77,7 @@ public:
|
||||
friend bool operator ==(const AuthenticationData & lhs, const AuthenticationData & rhs);
|
||||
friend bool operator !=(const AuthenticationData & lhs, const AuthenticationData & rhs) { return !(lhs == rhs); }
|
||||
|
||||
static AuthenticationData fromAST(const ASTAuthenticationData & query, ContextPtr context, bool check_password_rules);
|
||||
static AuthenticationData fromAST(const ASTAuthenticationData & query, ContextPtr context, bool validate);
|
||||
std::shared_ptr<ASTAuthenticationData> toAST() const;
|
||||
|
||||
struct Util
|
||||
|
@ -121,6 +121,7 @@ namespace
|
||||
bool allow_no_password,
|
||||
bool allow_plaintext_password)
|
||||
{
|
||||
const bool validate = true;
|
||||
auto user = std::make_shared<User>();
|
||||
user->setName(user_name);
|
||||
String user_config = "users." + user_name;
|
||||
@ -157,17 +158,17 @@ namespace
|
||||
if (has_password_plaintext)
|
||||
{
|
||||
user->authentication_methods.emplace_back(AuthenticationType::PLAINTEXT_PASSWORD);
|
||||
user->authentication_methods.back().setPassword(config.getString(user_config + ".password"));
|
||||
user->authentication_methods.back().setPassword(config.getString(user_config + ".password"), validate);
|
||||
}
|
||||
else if (has_password_sha256_hex)
|
||||
{
|
||||
user->authentication_methods.emplace_back(AuthenticationType::SHA256_PASSWORD);
|
||||
user->authentication_methods.back().setPasswordHashHex(config.getString(user_config + ".password_sha256_hex"));
|
||||
user->authentication_methods.back().setPasswordHashHex(config.getString(user_config + ".password_sha256_hex"), validate);
|
||||
}
|
||||
else if (has_password_double_sha1_hex)
|
||||
{
|
||||
user->authentication_methods.emplace_back(AuthenticationType::DOUBLE_SHA1_PASSWORD);
|
||||
user->authentication_methods.back().setPasswordHashHex(config.getString(user_config + ".password_double_sha1_hex"));
|
||||
user->authentication_methods.back().setPasswordHashHex(config.getString(user_config + ".password_double_sha1_hex"), validate);
|
||||
}
|
||||
else if (has_ldap)
|
||||
{
|
||||
|
@ -28,11 +28,36 @@ namespace ErrorCodes
|
||||
namespace
|
||||
{
|
||||
|
||||
constexpr size_t max_events = 32;
|
||||
constexpr size_t MAX_EVENTS = 32;
|
||||
|
||||
|
||||
template <typename T>
|
||||
void mergeEventsList(T & events_list, size_t prefix_size, bool prefix_sorted, bool suffix_sorted)
|
||||
{
|
||||
/// either sort whole container or do so partially merging ranges afterwards
|
||||
if (!prefix_sorted && !suffix_sorted)
|
||||
std::stable_sort(std::begin(events_list), std::end(events_list));
|
||||
else
|
||||
{
|
||||
const auto begin = std::begin(events_list);
|
||||
const auto middle = std::next(begin, prefix_size);
|
||||
const auto end = std::end(events_list);
|
||||
|
||||
if (!prefix_sorted)
|
||||
std::stable_sort(begin, middle);
|
||||
|
||||
if (!suffix_sorted)
|
||||
std::stable_sort(middle, end);
|
||||
|
||||
std::inplace_merge(begin, middle, end);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
struct AggregateFunctionWindowFunnelData
|
||||
{
|
||||
static constexpr bool strict_once_enabled = false;
|
||||
|
||||
using TimestampEvent = std::pair<T, UInt8>;
|
||||
using TimestampEvents = PODArrayWithStackMemory<TimestampEvent, 64>;
|
||||
|
||||
@ -66,24 +91,7 @@ struct AggregateFunctionWindowFunnelData
|
||||
|
||||
events_list.insert(std::begin(other.events_list), std::end(other.events_list));
|
||||
|
||||
/// either sort whole container or do so partially merging ranges afterwards
|
||||
if (!sorted && !other.sorted)
|
||||
std::stable_sort(std::begin(events_list), std::end(events_list));
|
||||
else
|
||||
{
|
||||
const auto begin = std::begin(events_list);
|
||||
const auto middle = std::next(begin, size);
|
||||
const auto end = std::end(events_list);
|
||||
|
||||
if (!sorted)
|
||||
std::stable_sort(begin, middle);
|
||||
|
||||
if (!other.sorted)
|
||||
std::stable_sort(middle, end);
|
||||
|
||||
std::inplace_merge(begin, middle, end);
|
||||
}
|
||||
|
||||
mergeEventsList(events_list, size, sorted, other.sorted);
|
||||
sorted = true;
|
||||
}
|
||||
|
||||
@ -133,6 +141,131 @@ struct AggregateFunctionWindowFunnelData
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct AggregateFunctionWindowFunnelStrictOnceData
|
||||
{
|
||||
static constexpr bool strict_once_enabled = true;
|
||||
struct TimestampEvent
|
||||
{
|
||||
T timestamp;
|
||||
UInt8 event_type;
|
||||
UInt64 unique_id;
|
||||
|
||||
TimestampEvent(T timestamp_, UInt8 event_type_, UInt64 unique_id_)
|
||||
: timestamp(timestamp_), event_type(event_type_), unique_id(unique_id_) {}
|
||||
|
||||
bool operator<(const TimestampEvent & other) const
|
||||
{
|
||||
return std::tie(timestamp, event_type, unique_id) < std::tie(other.timestamp, other.event_type, other.unique_id);
|
||||
}
|
||||
|
||||
bool operator<=(const TimestampEvent & other) const
|
||||
{
|
||||
return std::tie(timestamp, event_type, unique_id) <= std::tie(other.timestamp, other.event_type, other.unique_id);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
using TimestampEvents = PODArrayWithStackMemory<TimestampEvent, 64>;
|
||||
TimestampEvents events_list;
|
||||
|
||||
/// Next unique identifier for events
|
||||
/// Used to distinguish events with the same timestamp that matches several conditions.
|
||||
UInt64 next_unique_id = 1;
|
||||
bool sorted = true;
|
||||
|
||||
size_t size() const
|
||||
{
|
||||
return events_list.size();
|
||||
}
|
||||
|
||||
void advanceId()
|
||||
{
|
||||
++next_unique_id;
|
||||
}
|
||||
|
||||
void add(T timestamp, UInt8 event_type)
|
||||
{
|
||||
TimestampEvent new_event(timestamp, event_type, next_unique_id);
|
||||
/// Check if the new event maintains the sorted order
|
||||
if (sorted && !events_list.empty())
|
||||
sorted = events_list.back() <= new_event;
|
||||
events_list.push_back(new_event);
|
||||
}
|
||||
|
||||
void merge(const AggregateFunctionWindowFunnelStrictOnceData & other)
|
||||
{
|
||||
if (other.events_list.empty())
|
||||
return;
|
||||
|
||||
const auto current_size = events_list.size();
|
||||
|
||||
UInt64 new_next_unique_id = next_unique_id;
|
||||
events_list.reserve(current_size + other.events_list.size());
|
||||
for (auto other_event : other.events_list)
|
||||
{
|
||||
/// Assign unique IDs to the new events to prevent conflicts
|
||||
other_event.unique_id += next_unique_id;
|
||||
new_next_unique_id = std::max(new_next_unique_id, other_event.unique_id + 1);
|
||||
events_list.push_back(other_event);
|
||||
}
|
||||
next_unique_id = new_next_unique_id;
|
||||
|
||||
mergeEventsList(events_list, current_size, sorted, other.sorted);
|
||||
|
||||
sorted = true;
|
||||
}
|
||||
|
||||
void sort()
|
||||
{
|
||||
if (!sorted)
|
||||
{
|
||||
std::stable_sort(std::begin(events_list), std::end(events_list));
|
||||
sorted = true;
|
||||
}
|
||||
}
|
||||
|
||||
void serialize(WriteBuffer & buf) const
|
||||
{
|
||||
writeBinary(sorted, buf);
|
||||
writeBinary(events_list.size(), buf);
|
||||
|
||||
for (const auto & event : events_list)
|
||||
{
|
||||
writeBinary(event.timestamp, buf);
|
||||
writeBinary(event.event_type, buf);
|
||||
writeBinary(event.unique_id, buf);
|
||||
}
|
||||
}
|
||||
|
||||
void deserialize(ReadBuffer & buf)
|
||||
{
|
||||
readBinary(sorted, buf);
|
||||
|
||||
size_t events_size;
|
||||
readBinary(events_size, buf);
|
||||
|
||||
if (events_size > 100'000'000) /// Arbitrary limit to prevent excessive memory allocation
|
||||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large size of the state of windowFunnel");
|
||||
|
||||
events_list.clear();
|
||||
events_list.reserve(events_size);
|
||||
|
||||
T timestamp;
|
||||
UInt8 event_type;
|
||||
UInt64 unique_id = 0;
|
||||
|
||||
for (size_t i = 0; i < events_size; ++i)
|
||||
{
|
||||
readBinary(timestamp, buf);
|
||||
readBinary(event_type, buf);
|
||||
readBinary(unique_id, buf);
|
||||
next_unique_id = std::max(next_unique_id, unique_id + 1);
|
||||
events_list.emplace_back(timestamp, event_type, unique_id);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/** Calculates the max event level in a sliding window.
|
||||
* The max size of events is 32, that's enough for funnel analytics
|
||||
*
|
||||
@ -160,22 +293,15 @@ private:
|
||||
/// The level path must be 1---2---3---...---check_events_size, find the max event level that satisfied the path in the sliding window.
|
||||
/// If found, returns the max event level, else return 0.
|
||||
/// The algorithm works in O(n) time, but the overall function works in O(n * log(n)) due to sorting.
|
||||
UInt8 getEventLevel(Data & data) const
|
||||
UInt8 getEventLevelNonStrictOnce(const AggregateFunctionWindowFunnelData<T>::TimestampEvents & events_list) const
|
||||
{
|
||||
if (data.size() == 0)
|
||||
return 0;
|
||||
if (!strict_order && events_size == 1)
|
||||
return 1;
|
||||
|
||||
data.sort();
|
||||
|
||||
/// events_timestamp stores the timestamp of the first and previous i-th level event happen within time window
|
||||
std::vector<std::optional<std::pair<UInt64, UInt64>>> events_timestamp(events_size);
|
||||
bool first_event = false;
|
||||
for (size_t i = 0; i < data.events_list.size(); ++i)
|
||||
for (size_t i = 0; i < events_list.size(); ++i)
|
||||
{
|
||||
const T & timestamp = data.events_list[i].first;
|
||||
const auto & event_idx = data.events_list[i].second - 1;
|
||||
const T & timestamp = events_list[i].first;
|
||||
const auto & event_idx = events_list[i].second - 1;
|
||||
if (strict_order && event_idx == -1)
|
||||
{
|
||||
if (first_event)
|
||||
@ -189,7 +315,7 @@ private:
|
||||
}
|
||||
else if (strict_deduplication && events_timestamp[event_idx].has_value())
|
||||
{
|
||||
return data.events_list[i - 1].second;
|
||||
return events_list[i - 1].second;
|
||||
}
|
||||
else if (strict_order && first_event && !events_timestamp[event_idx - 1].has_value())
|
||||
{
|
||||
@ -222,6 +348,126 @@ private:
|
||||
return 0;
|
||||
}
|
||||
|
||||
UInt8 getEventLevelStrictOnce(const AggregateFunctionWindowFunnelStrictOnceData<T>::TimestampEvents & events_list) const
|
||||
{
|
||||
/// Stores the timestamp of the first and last i-th level event happen within time window
|
||||
struct EventMatchTimeWindow
|
||||
{
|
||||
UInt64 first_timestamp;
|
||||
UInt64 last_timestamp;
|
||||
std::array<UInt64, MAX_EVENTS> event_path;
|
||||
|
||||
EventMatchTimeWindow() = default;
|
||||
EventMatchTimeWindow(UInt64 first_ts, UInt64 last_ts)
|
||||
: first_timestamp(first_ts), last_timestamp(last_ts) {}
|
||||
};
|
||||
|
||||
/// We track all possible event sequences up to the current event.
|
||||
/// It's required because one event can meet several conditions.
|
||||
/// For example: for events 'start', 'a', 'b', 'a', 'end'.
|
||||
/// The second occurrence of 'a' should be counted only once in one sequence.
|
||||
/// However, we do not know in advance if the next event will be 'b' or 'end', so we try to keep both paths.
|
||||
std::vector<std::list<EventMatchTimeWindow>> event_sequences(events_size);
|
||||
|
||||
bool has_first_event = false;
|
||||
for (size_t i = 0; i < events_list.size(); ++i)
|
||||
{
|
||||
const auto & current_event = events_list[i];
|
||||
auto timestamp = current_event.timestamp;
|
||||
Int64 event_idx = current_event.event_type - 1;
|
||||
UInt64 unique_id = current_event.unique_id;
|
||||
|
||||
if (strict_order && event_idx == -1)
|
||||
{
|
||||
if (has_first_event)
|
||||
break;
|
||||
else
|
||||
continue;
|
||||
}
|
||||
else if (event_idx == 0)
|
||||
{
|
||||
auto & event_seq = event_sequences[0].emplace_back(timestamp, timestamp);
|
||||
event_seq.event_path[0] = unique_id;
|
||||
has_first_event = true;
|
||||
}
|
||||
else if (strict_deduplication && !event_sequences[event_idx].empty())
|
||||
{
|
||||
return events_list[i - 1].event_type;
|
||||
}
|
||||
else if (strict_order && has_first_event && event_sequences[event_idx - 1].empty())
|
||||
{
|
||||
for (size_t event = 0; event < event_sequences.size(); ++event)
|
||||
{
|
||||
if (event_sequences[event].empty())
|
||||
return event;
|
||||
}
|
||||
}
|
||||
else if (!event_sequences[event_idx - 1].empty())
|
||||
{
|
||||
auto & prev_level = event_sequences[event_idx - 1];
|
||||
for (auto it = prev_level.begin(); it != prev_level.end();)
|
||||
{
|
||||
auto first_ts = it->first_timestamp;
|
||||
bool time_matched = timestamp <= first_ts + window;
|
||||
if (!time_matched && prev_level.size() > 1)
|
||||
{
|
||||
// Remove old events that are out of the window, but keep at least one
|
||||
it = prev_level.erase(it);
|
||||
continue;
|
||||
}
|
||||
|
||||
auto prev_path = it->event_path;
|
||||
chassert(event_idx > 0);
|
||||
|
||||
/// Ensure the unique_id hasn't been used in the path already
|
||||
for (size_t j = 0; j < static_cast<size_t>(event_idx); ++j)
|
||||
{
|
||||
if (!time_matched)
|
||||
break;
|
||||
time_matched = prev_path[j] != unique_id;
|
||||
}
|
||||
|
||||
if (time_matched && strict_increase)
|
||||
time_matched = it->last_timestamp < timestamp;
|
||||
|
||||
if (time_matched)
|
||||
{
|
||||
prev_path[event_idx] = unique_id;
|
||||
|
||||
auto & new_seq = event_sequences[event_idx].emplace_back(first_ts, timestamp);
|
||||
new_seq.event_path = std::move(prev_path);
|
||||
if (event_idx + 1 == events_size)
|
||||
return events_size;
|
||||
}
|
||||
++it;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t event = event_sequences.size(); event > 0; --event)
|
||||
{
|
||||
if (!event_sequences[event - 1].empty())
|
||||
return event;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
UInt8 getEventLevel(Data & data) const
|
||||
{
|
||||
if (data.size() == 0)
|
||||
return 0;
|
||||
if (!strict_order && events_size == 1)
|
||||
return 1;
|
||||
|
||||
data.sort();
|
||||
|
||||
if constexpr (Data::strict_once_enabled)
|
||||
return getEventLevelStrictOnce(data.events_list);
|
||||
else
|
||||
return getEventLevelNonStrictOnce(data.events_list);
|
||||
}
|
||||
|
||||
public:
|
||||
String getName() const override
|
||||
{
|
||||
@ -246,6 +492,9 @@ public:
|
||||
strict_order = true;
|
||||
else if (option == "strict_increase")
|
||||
strict_increase = true;
|
||||
else if (option == "strict_once")
|
||||
/// Checked in factory
|
||||
chassert(Data::strict_once_enabled);
|
||||
else if (option == "strict")
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "strict is replaced with strict_deduplication in Aggregate function {}", getName());
|
||||
else
|
||||
@ -272,6 +521,9 @@ public:
|
||||
|
||||
if (strict_order && !has_event)
|
||||
this->data(place).add(timestamp, 0);
|
||||
|
||||
if constexpr (Data::strict_once_enabled)
|
||||
this->data(place).advanceId();
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
@ -296,7 +548,6 @@ public:
|
||||
};
|
||||
|
||||
|
||||
template <template <typename> class Data>
|
||||
AggregateFunctionPtr
|
||||
createAggregateFunctionWindowFunnel(const std::string & name, const DataTypes & arguments, const Array & params, const Settings *)
|
||||
{
|
||||
@ -309,7 +560,7 @@ createAggregateFunctionWindowFunnel(const std::string & name, const DataTypes &
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
"Aggregate function {} requires one timestamp argument and at least one event condition.", name);
|
||||
|
||||
if (arguments.size() > max_events + 1)
|
||||
if (arguments.size() > MAX_EVENTS + 1)
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Too many event arguments for aggregate function {}", name);
|
||||
|
||||
for (const auto i : collections::range(1, arguments.size()))
|
||||
@ -321,16 +572,29 @@ createAggregateFunctionWindowFunnel(const std::string & name, const DataTypes &
|
||||
cond_arg->getName(), toString(i + 1), name);
|
||||
}
|
||||
|
||||
AggregateFunctionPtr res(createWithUnsignedIntegerType<AggregateFunctionWindowFunnel, Data>(*arguments[0], arguments, params));
|
||||
WhichDataType which(arguments.front().get());
|
||||
if (res)
|
||||
return res;
|
||||
if (which.isDate())
|
||||
return std::make_shared<AggregateFunctionWindowFunnel<DataTypeDate::FieldType, Data<DataTypeDate::FieldType>>>(arguments, params);
|
||||
if (which.isDateTime())
|
||||
return std::make_shared<AggregateFunctionWindowFunnel<DataTypeDateTime::FieldType, Data<DataTypeDateTime::FieldType>>>(
|
||||
arguments, params);
|
||||
|
||||
bool strict_once = params.size() > 1 && std::any_of(params.begin() + 1, params.end(), [](const auto & f) { return f.template safeGet<String>() == "strict_once"; });
|
||||
if (strict_once)
|
||||
{
|
||||
AggregateFunctionPtr res(createWithUnsignedIntegerType<AggregateFunctionWindowFunnel, AggregateFunctionWindowFunnelStrictOnceData>(*arguments[0], arguments, params));
|
||||
WhichDataType which(arguments.front().get());
|
||||
if (res)
|
||||
return res;
|
||||
if (which.isDate())
|
||||
return std::make_shared<AggregateFunctionWindowFunnel<DataTypeDate::FieldType, AggregateFunctionWindowFunnelStrictOnceData<DataTypeDate::FieldType>>>(arguments, params);
|
||||
if (which.isDateTime())
|
||||
return std::make_shared<AggregateFunctionWindowFunnel<DataTypeDateTime::FieldType, AggregateFunctionWindowFunnelStrictOnceData<DataTypeDateTime::FieldType>>>(arguments, params);
|
||||
}
|
||||
else
|
||||
{
|
||||
AggregateFunctionPtr res(createWithUnsignedIntegerType<AggregateFunctionWindowFunnel, AggregateFunctionWindowFunnelData>(*arguments[0], arguments, params));
|
||||
WhichDataType which(arguments.front().get());
|
||||
if (res)
|
||||
return res;
|
||||
if (which.isDate())
|
||||
return std::make_shared<AggregateFunctionWindowFunnel<DataTypeDate::FieldType, AggregateFunctionWindowFunnelData<DataTypeDate::FieldType>>>(arguments, params);
|
||||
if (which.isDateTime())
|
||||
return std::make_shared<AggregateFunctionWindowFunnel<DataTypeDateTime::FieldType, AggregateFunctionWindowFunnelData<DataTypeDateTime::FieldType>>>(arguments, params);
|
||||
}
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Illegal type {} of first argument of aggregate function {}, must "
|
||||
"be Unsigned Number, Date, DateTime", arguments.front().get()->getName(), name);
|
||||
@ -340,7 +604,7 @@ createAggregateFunctionWindowFunnel(const std::string & name, const DataTypes &
|
||||
|
||||
void registerAggregateFunctionWindowFunnel(AggregateFunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction("windowFunnel", createAggregateFunctionWindowFunnel<AggregateFunctionWindowFunnelData>);
|
||||
factory.registerFunction("windowFunnel", createAggregateFunctionWindowFunnel);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -78,11 +78,6 @@ struct WindowFunction : public IAggregateFunctionHelper<WindowFunction>, public
|
||||
}
|
||||
|
||||
String getName() const override { return name; }
|
||||
void create(AggregateDataPtr __restrict) const override { }
|
||||
void destroy(AggregateDataPtr __restrict) const noexcept override { }
|
||||
bool hasTrivialDestructor() const override { return true; }
|
||||
size_t sizeOfData() const override { return 0; }
|
||||
size_t alignOfData() const override { return 1; }
|
||||
void add(AggregateDataPtr __restrict, const IColumn **, size_t, Arena *) const override { fail(); }
|
||||
void merge(AggregateDataPtr __restrict, ConstAggregateDataPtr, Arena *) const override { fail(); }
|
||||
void serialize(ConstAggregateDataPtr __restrict, WriteBuffer &, std::optional<size_t>) const override { fail(); }
|
||||
@ -90,6 +85,22 @@ struct WindowFunction : public IAggregateFunctionHelper<WindowFunction>, public
|
||||
void insertResultInto(AggregateDataPtr __restrict, IColumn &, Arena *) const override { fail(); }
|
||||
};
|
||||
|
||||
struct StatelessWindowFunction : public WindowFunction
|
||||
{
|
||||
StatelessWindowFunction(
|
||||
const std::string & name_, const DataTypes & argument_types_, const Array & parameters_, const DataTypePtr & result_type_)
|
||||
: WindowFunction(name_, argument_types_, parameters_, result_type_)
|
||||
{
|
||||
}
|
||||
|
||||
size_t sizeOfData() const override { return 0; }
|
||||
size_t alignOfData() const override { return 1; }
|
||||
|
||||
void create(AggregateDataPtr __restrict) const override { }
|
||||
void destroy(AggregateDataPtr __restrict) const noexcept override { }
|
||||
bool hasTrivialDestructor() const override { return true; }
|
||||
};
|
||||
|
||||
template <typename State>
|
||||
struct StatefulWindowFunction : public WindowFunction
|
||||
{
|
||||
@ -100,7 +111,7 @@ struct StatefulWindowFunction : public WindowFunction
|
||||
}
|
||||
|
||||
size_t sizeOfData() const override { return sizeof(State); }
|
||||
size_t alignOfData() const override { return 1; }
|
||||
size_t alignOfData() const override { return alignof(State); }
|
||||
|
||||
void create(AggregateDataPtr __restrict place) const override { new (place) State(); }
|
||||
|
||||
|
@ -34,7 +34,7 @@ namespace ErrorCodes
|
||||
namespace
|
||||
{
|
||||
|
||||
void exctractJoinConditions(const QueryTreeNodePtr & node, QueryTreeNodes & equi_conditions, QueryTreeNodes & other)
|
||||
void extractJoinConditions(const QueryTreeNodePtr & node, QueryTreeNodes & equi_conditions, QueryTreeNodes & other)
|
||||
{
|
||||
auto * func = node->as<FunctionNode>();
|
||||
if (!func)
|
||||
@ -52,7 +52,7 @@ void exctractJoinConditions(const QueryTreeNodePtr & node, QueryTreeNodes & equi
|
||||
else if (func->getFunctionName() == "and")
|
||||
{
|
||||
for (const auto & arg : args)
|
||||
exctractJoinConditions(arg, equi_conditions, other);
|
||||
extractJoinConditions(arg, equi_conditions, other);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -118,7 +118,7 @@ public:
|
||||
|
||||
QueryTreeNodes equi_conditions;
|
||||
QueryTreeNodes other_conditions;
|
||||
exctractJoinConditions(where_condition, equi_conditions, other_conditions);
|
||||
extractJoinConditions(where_condition, equi_conditions, other_conditions);
|
||||
bool can_convert_cross_to_inner = false;
|
||||
for (auto & condition : equi_conditions)
|
||||
{
|
||||
|
@ -432,6 +432,14 @@ QueryTreeNodePtr IdentifierResolver::tryResolveTableIdentifierFromDatabaseCatalo
|
||||
else
|
||||
storage = DatabaseCatalog::instance().tryGetTable(storage_id, context);
|
||||
|
||||
if (!storage && storage_id.hasUUID())
|
||||
{
|
||||
// If `storage_id` has UUID, it is possible that the UUID is removed from `DatabaseCatalog` after `context->resolveStorageID(storage_id)`
|
||||
// We try to get the table with the database name and the table name.
|
||||
auto database = DatabaseCatalog::instance().tryGetDatabase(storage_id.getDatabaseName());
|
||||
if (database)
|
||||
storage = database->tryGetTable(table_name, context);
|
||||
}
|
||||
if (!storage)
|
||||
return {};
|
||||
|
||||
|
@ -15,7 +15,7 @@ namespace DB
|
||||
{
|
||||
struct ConnectionParameters
|
||||
{
|
||||
std::string host;
|
||||
String host;
|
||||
UInt16 port{};
|
||||
std::string default_database;
|
||||
std::string user;
|
||||
@ -30,8 +30,8 @@ struct ConnectionParameters
|
||||
ConnectionTimeouts timeouts;
|
||||
|
||||
ConnectionParameters() = default;
|
||||
ConnectionParameters(const Poco::Util::AbstractConfiguration & config, std::string host);
|
||||
ConnectionParameters(const Poco::Util::AbstractConfiguration & config, std::string host, std::optional<UInt16> port);
|
||||
ConnectionParameters(const Poco::Util::AbstractConfiguration & config, String host);
|
||||
ConnectionParameters(const Poco::Util::AbstractConfiguration & config, String host, std::optional<UInt16> port);
|
||||
|
||||
static UInt16 getPortFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & connection_host);
|
||||
|
||||
|
@ -291,9 +291,14 @@
|
||||
M(CacheWarmerBytesInProgress, "Total size of remote file segments waiting to be asynchronously loaded into filesystem cache.") \
|
||||
M(DistrCacheOpenedConnections, "Number of open connections to Distributed Cache") \
|
||||
M(DistrCacheUsedConnections, "Number of currently used connections to Distributed Cache") \
|
||||
M(DistrCacheAllocatedConnections, "Number of currently allocated connections to Distributed Cache connection pool") \
|
||||
M(DistrCacheBorrowedConnections, "Number of currently borrowed connections to Distributed Cache connection pool") \
|
||||
M(DistrCacheReadRequests, "Number of executed Read requests to Distributed Cache") \
|
||||
M(DistrCacheWriteRequests, "Number of executed Write requests to Distributed Cache") \
|
||||
M(DistrCacheServerConnections, "Number of open connections to ClickHouse server from Distributed Cache") \
|
||||
M(DistrCacheRegisteredServers, "Number of distributed cache registered servers") \
|
||||
M(DistrCacheRegisteredServersCurrentAZ, "Number of distributed cache registered servers in current az") \
|
||||
M(DistrCacheServerS3CachedClients, "Number of distributed cache S3 cached clients") \
|
||||
\
|
||||
M(SchedulerIOReadScheduled, "Number of IO reads are being scheduled currently") \
|
||||
M(SchedulerIOWriteScheduled, "Number of IO writes are being scheduled currently") \
|
||||
@ -314,6 +319,20 @@
|
||||
M(FilteringMarksWithSecondaryKeys, "Number of threads currently doing filtering of mark ranges by secondary keys") \
|
||||
\
|
||||
M(DiskS3NoSuchKeyErrors, "The number of `NoSuchKey` errors that occur when reading data from S3 cloud storage through ClickHouse disks.") \
|
||||
\
|
||||
M(SharedCatalogStateApplicationThreads, "Number of threads in the threadpool for state application in Shared Catalog.") \
|
||||
M(SharedCatalogStateApplicationThreadsActive, "Number of active threads in the threadpool for state application in Shared Catalog.") \
|
||||
M(SharedCatalogStateApplicationThreadsScheduled, "Number of queued or active jobs in the threadpool for state application in Shared Catalog.") \
|
||||
\
|
||||
M(SharedCatalogDropLocalThreads, "Number of threads in the threadpool for drop of local tables in Shared Catalog.") \
|
||||
M(SharedCatalogDropLocalThreadsActive, "Number of active threads in the threadpool for drop of local tables in Shared Catalog.") \
|
||||
M(SharedCatalogDropLocalThreadsScheduled, "Number of queued or active jobs in the threadpool for drop of local tables in Shared Catalog.") \
|
||||
\
|
||||
M(SharedCatalogDropZooKeeperThreads, "Number of threads in the threadpool for drop of object in ZooKeeper in Shared Catalog.") \
|
||||
M(SharedCatalogDropZooKeeperThreadsActive, "Number of active threads in the threadpool for drop of object in ZooKeeper in Shared Catalog.") \
|
||||
M(SharedCatalogDropZooKeeperThreadsScheduled, "Number of queued or active jobs in the threadpool for drop of object in ZooKeeper in Shared Catalog.") \
|
||||
\
|
||||
M(SharedDatabaseCatalogTablesInLocalDropDetachQueue, "Number of tables in the queue for local drop or detach in Shared Catalog.") \
|
||||
|
||||
#ifdef APPLY_FOR_EXTERNAL_METRICS
|
||||
#define APPLY_FOR_METRICS(M) APPLY_FOR_BUILTIN_METRICS(M) APPLY_FOR_EXTERNAL_METRICS(M)
|
||||
|
@ -452,7 +452,7 @@
|
||||
M(553, LZMA_STREAM_ENCODER_FAILED) \
|
||||
M(554, LZMA_STREAM_DECODER_FAILED) \
|
||||
M(555, ROCKSDB_ERROR) \
|
||||
M(556, SYNC_MYSQL_USER_ACCESS_ERROR)\
|
||||
M(556, SYNC_MYSQL_USER_ACCESS_ERROR) \
|
||||
M(557, UNKNOWN_UNION) \
|
||||
M(558, EXPECTED_ALL_OR_DISTINCT) \
|
||||
M(559, INVALID_GRPC_QUERY_INFO) \
|
||||
@ -578,7 +578,7 @@
|
||||
M(697, CANNOT_RESTORE_TO_NONENCRYPTED_DISK) \
|
||||
M(698, INVALID_REDIS_STORAGE_TYPE) \
|
||||
M(699, INVALID_REDIS_TABLE_STRUCTURE) \
|
||||
M(700, USER_SESSION_LIMIT_EXCEEDED) \
|
||||
M(700, USER_SESSION_LIMIT_EXCEEDED) \
|
||||
M(701, CLUSTER_DOESNT_EXIST) \
|
||||
M(702, CLIENT_INFO_DOES_NOT_MATCH) \
|
||||
M(703, INVALID_IDENTIFIER) \
|
||||
@ -610,15 +610,17 @@
|
||||
M(729, ILLEGAL_TIME_SERIES_TAGS) \
|
||||
M(730, REFRESH_FAILED) \
|
||||
M(731, QUERY_CACHE_USED_WITH_NON_THROW_OVERFLOW_MODE) \
|
||||
\
|
||||
M(733, TABLE_IS_BEING_RESTARTED) \
|
||||
\
|
||||
M(900, DISTRIBUTED_CACHE_ERROR) \
|
||||
M(901, CANNOT_USE_DISTRIBUTED_CACHE) \
|
||||
\
|
||||
M(902, PROTOCOL_VERSION_MISMATCH) \
|
||||
\
|
||||
M(999, KEEPER_EXCEPTION) \
|
||||
M(1000, POCO_EXCEPTION) \
|
||||
M(1001, STD_EXCEPTION) \
|
||||
M(1002, UNKNOWN_EXCEPTION) \
|
||||
/* See END */
|
||||
/* See END */
|
||||
|
||||
#ifdef APPLY_FOR_EXTERNAL_ERROR_CODES
|
||||
#define APPLY_FOR_ERROR_CODES(M) APPLY_FOR_BUILTIN_ERROR_CODES(M) APPLY_FOR_EXTERNAL_ERROR_CODES(M)
|
||||
|
@ -49,11 +49,21 @@ static struct InitFiu
|
||||
ONCE(smt_commit_write_zk_fail_before_op) \
|
||||
ONCE(smt_commit_merge_change_version_before_op) \
|
||||
ONCE(smt_merge_mutate_intention_freeze_in_destructor) \
|
||||
ONCE(smt_add_part_sleep_after_add_before_commit) \
|
||||
ONCE(smt_sleep_in_constructor) \
|
||||
ONCE(meta_in_keeper_create_metadata_failure) \
|
||||
ONCE(smt_insert_retry_timeout) \
|
||||
ONCE(smt_insert_fake_hardware_error) \
|
||||
ONCE(smt_sleep_after_hardware_in_insert) \
|
||||
ONCE(smt_throw_keeper_exception_after_successful_insert) \
|
||||
REGULAR(smt_dont_merge_first_part) \
|
||||
REGULAR(smt_sleep_in_schedule_data_processing_job) \
|
||||
REGULAR(cache_warmer_stall) \
|
||||
REGULAR(check_table_query_delay_for_part) \
|
||||
REGULAR(dummy_failpoint) \
|
||||
REGULAR(prefetched_reader_pool_failpoint) \
|
||||
REGULAR(shared_set_sleep_during_update) \
|
||||
REGULAR(smt_outdated_parts_exception_response) \
|
||||
PAUSEABLE_ONCE(replicated_merge_tree_insert_retry_pause) \
|
||||
PAUSEABLE_ONCE(finish_set_quorum_failed_parts) \
|
||||
PAUSEABLE_ONCE(finish_clean_quorum_failed_parts) \
|
||||
|
@ -241,6 +241,8 @@
|
||||
M(MergeVerticalStageExecuteMilliseconds, "Total busy time spent for execution of vertical stage of background merges", ValueType::Milliseconds) \
|
||||
M(MergeProjectionStageTotalMilliseconds, "Total time spent for projection stage of background merges", ValueType::Milliseconds) \
|
||||
M(MergeProjectionStageExecuteMilliseconds, "Total busy time spent for execution of projection stage of background merges", ValueType::Milliseconds) \
|
||||
M(MergePrewarmStageTotalMilliseconds, "Total time spent for prewarm stage of background merges", ValueType::Milliseconds) \
|
||||
M(MergePrewarmStageExecuteMilliseconds, "Total busy time spent for execution of prewarm stage of background merges", ValueType::Milliseconds) \
|
||||
\
|
||||
M(MergingSortedMilliseconds, "Total time spent while merging sorted columns", ValueType::Milliseconds) \
|
||||
M(AggregatingSortedMilliseconds, "Total time spent while aggregating sorted columns", ValueType::Milliseconds) \
|
||||
@ -639,6 +641,8 @@ The server successfully detected this situation and will download merged part fr
|
||||
M(MetadataFromKeeperBackgroundCleanupTransactions, "Number of times old transaction idempotency token was cleaned up by background task", ValueType::Number) \
|
||||
M(MetadataFromKeeperBackgroundCleanupErrors, "Number of times an error was encountered in background cleanup task", ValueType::Number) \
|
||||
\
|
||||
M(SharedMergeTreeMetadataCacheHintLoadedFromCache, "Number of times metadata cache hint was found without going to Keeper", ValueType::Number) \
|
||||
\
|
||||
M(KafkaRebalanceRevocations, "Number of partition revocations (the first stage of consumer group rebalance)", ValueType::Number) \
|
||||
M(KafkaRebalanceAssignments, "Number of partition assignments (the final stage of consumer group rebalance)", ValueType::Number) \
|
||||
M(KafkaRebalanceErrors, "Number of failed consumer group rebalances", ValueType::Number) \
|
||||
@ -742,29 +746,51 @@ The server successfully detected this situation and will download merged part fr
|
||||
M(ConnectionPoolIsFullMicroseconds, "Total time spent waiting for a slot in connection pool.", ValueType::Microseconds) \
|
||||
M(AsyncLoaderWaitMicroseconds, "Total time a query was waiting for async loader jobs.", ValueType::Microseconds) \
|
||||
\
|
||||
M(DistrCacheServerSwitches, "Number of server switches between distributed cache servers in read/write-through cache", ValueType::Number) \
|
||||
M(DistrCacheReadMicroseconds, "Time spent reading from distributed cache", ValueType::Microseconds) \
|
||||
M(DistrCacheFallbackReadMicroseconds, "Time spend reading from fallback buffer instead of distribted cache", ValueType::Microseconds) \
|
||||
M(DistrCachePrecomputeRangesMicroseconds, "Time spent to precompute read ranges", ValueType::Microseconds) \
|
||||
M(DistrCacheNextImplMicroseconds, "Time spend in ReadBufferFromDistributedCache::nextImpl", ValueType::Microseconds) \
|
||||
M(DistrCacheOpenedConnections, "The number of open connections to distributed cache", ValueType::Number) \
|
||||
M(DistrCacheReusedConnections, "The number of reused connections to distributed cache", ValueType::Number) \
|
||||
M(DistrCacheHoldConnections, "The number of used connections to distributed cache", ValueType::Number) \
|
||||
M(DistrCacheServerSwitches, "Distributed Cache read buffer event. Number of server switches between distributed cache servers in read/write-through cache", ValueType::Number) \
|
||||
M(DistrCacheReadMicroseconds, "Distributed Cache read buffer event. Time spent reading from distributed cache", ValueType::Microseconds) \
|
||||
M(DistrCacheFallbackReadMicroseconds, "Distributed Cache read buffer event. Time spend reading from fallback buffer instead of distributed cache", ValueType::Microseconds) \
|
||||
M(DistrCachePrecomputeRangesMicroseconds, "Distributed Cache read buffer event. Time spent to precompute read ranges", ValueType::Microseconds) \
|
||||
M(DistrCacheNextImplMicroseconds, "Distributed Cache read buffer event. Time spend in ReadBufferFromDistributedCache::nextImpl", ValueType::Microseconds) \
|
||||
M(DistrCacheStartRangeMicroseconds, "Distributed Cache read buffer event. Time spent to start a new read range with distributed cache", ValueType::Microseconds) \
|
||||
M(DistrCacheIgnoredBytesWhileWaitingProfileEvents, "Distributed Cache read buffer event. Ignored bytes while waiting for profile events in distributed cache", ValueType::Number) \
|
||||
M(DistrCacheRangeChange, "Distributed Cache read buffer event. Number of times we changed read range because of seek/last_position change", ValueType::Number) \
|
||||
\
|
||||
M(DistrCacheGetResponseMicroseconds, "Time spend to wait for response from distributed cache", ValueType::Microseconds) \
|
||||
M(DistrCacheStartRangeMicroseconds, "Time spent to start a new read range with distributed cache", ValueType::Microseconds) \
|
||||
M(DistrCacheLockRegistryMicroseconds, "Time spent to take DistributedCacheRegistry lock", ValueType::Microseconds) \
|
||||
M(DistrCacheUnusedPackets, "Number of skipped unused packets from distributed cache", ValueType::Number) \
|
||||
M(DistrCachePackets, "Total number of packets received from distributed cache", ValueType::Number) \
|
||||
M(DistrCacheUnusedPacketsBytes, "The number of bytes in Data packets which were ignored", ValueType::Bytes) \
|
||||
M(DistrCacheRegistryUpdateMicroseconds, "Time spent updating distributed cache registry", ValueType::Microseconds) \
|
||||
M(DistrCacheRegistryUpdates, "Number of distributed cache registry updates", ValueType::Number) \
|
||||
M(DistrCacheGetResponseMicroseconds, "Distributed Cache client event. Time spend to wait for response from distributed cache", ValueType::Microseconds) \
|
||||
M(DistrCacheReadErrors, "Distributed Cache client event. Number of distributed cache errors during read", ValueType::Number) \
|
||||
M(DistrCacheMakeRequestErrors, "Distributed Cache client event. Number of distributed cache errors when making a request", ValueType::Number) \
|
||||
M(DistrCacheReceiveResponseErrors, "Distributed Cache client event. Number of distributed cache errors when receiving response a request", ValueType::Number) \
|
||||
\
|
||||
M(DistrCacheConnectMicroseconds, "The time spent to connect to distributed cache", ValueType::Microseconds) \
|
||||
M(DistrCacheConnectAttempts, "The number of connection attempts to distributed cache", ValueType::Number) \
|
||||
M(DistrCacheGetClient, "Number of client access times", ValueType::Number) \
|
||||
M(DistrCachePackets, "Distributed Cache client event. Total number of packets received from distributed cache", ValueType::Number) \
|
||||
M(DistrCachePacketsBytes, "Distributed Cache client event. The number of bytes in Data packets which were not ignored", ValueType::Bytes) \
|
||||
M(DistrCacheUnusedPackets, "Distributed Cache client event. Number of skipped unused packets from distributed cache", ValueType::Number) \
|
||||
M(DistrCacheUnusedPacketsBytes, "Distributed Cache client event. The number of bytes in Data packets which were ignored", ValueType::Bytes) \
|
||||
M(DistrCacheUnusedPacketsBufferAllocations, "Distributed Cache client event. The number of extra buffer allocations in case we could not reuse existing buffer", ValueType::Number) \
|
||||
\
|
||||
M(DistrCacheServerProcessRequestMicroseconds, "Time spent processing request on DistributedCache server side", ValueType::Microseconds) \
|
||||
M(DistrCacheLockRegistryMicroseconds, "Distributed Cache registry event. Time spent to take DistributedCacheRegistry lock", ValueType::Microseconds) \
|
||||
M(DistrCacheRegistryUpdateMicroseconds, "Distributed Cache registry event. Time spent updating distributed cache registry", ValueType::Microseconds) \
|
||||
M(DistrCacheRegistryUpdates, "Distributed Cache registry event. Number of distributed cache registry updates", ValueType::Number) \
|
||||
M(DistrCacheHashRingRebuilds, "Distributed Cache registry event. Number of distributed cache hash ring rebuilds", ValueType::Number) \
|
||||
\
|
||||
M(DistrCacheReadBytesFromCache, "Distributed Cache read buffer event. Bytes read from distributed cache", ValueType::Bytes) \
|
||||
M(DistrCacheReadBytesFromFallbackBuffer, "Distributed Cache read buffer event. Bytes read from fallback buffer", ValueType::Number) \
|
||||
\
|
||||
M(DistrCacheRangeResetBackward, "Distributed Cache read buffer event. Number of times we reset read range because of seek/last_position change", ValueType::Number) \
|
||||
M(DistrCacheRangeResetForward, "Distributed Cache read buffer event. Number of times we reset read range because of seek/last_position change", ValueType::Number) \
|
||||
\
|
||||
M(DistrCacheOpenedConnections, "Distributed Cache connection event. The number of open connections to distributed cache", ValueType::Number) \
|
||||
M(DistrCacheReusedConnections, "Distributed Cache connection event. The number of reused connections to distributed cache", ValueType::Number) \
|
||||
M(DistrCacheOpenedConnectionsBypassingPool, "Distributed Cache connection event. The number of open connections to distributed cache bypassing pool", ValueType::Number) \
|
||||
M(DistrCacheConnectMicroseconds, "Distributed Cache connection event. The time spent to connect to distributed cache", ValueType::Microseconds) \
|
||||
M(DistrCacheConnectAttempts, "Distributed Cache connection event. The number of connection attempts to distributed cache", ValueType::Number) \
|
||||
M(DistrCacheGetClientMicroseconds, "Distributed Cache connection event. Time spent getting client for distributed cache", ValueType::Microseconds) \
|
||||
\
|
||||
M(DistrCacheServerProcessRequestMicroseconds, "Distributed Cache server event. Time spent processing request on DistributedCache server side", ValueType::Microseconds) \
|
||||
M(DistrCacheServerStartRequestPackets, "Distributed Cache server event. Number of StartRequest packets in DistributedCacheServer", ValueType::Number) \
|
||||
M(DistrCacheServerContinueRequestPackets, "Distributed Cache server event. Number of ContinueRequest packets in DistributedCacheServer", ValueType::Number) \
|
||||
M(DistrCacheServerEndRequestPackets, "Distributed Cache server event. Number of EndRequest packets in DistributedCacheServer", ValueType::Number) \
|
||||
M(DistrCacheServerAckRequestPackets, "Distributed Cache server event. Number of AckRequest packets in DistributedCacheServer", ValueType::Number) \
|
||||
M(DistrCacheServerNewS3CachedClients, "Distributed Cache server event. The number of new cached s3 clients", ValueType::Number) \
|
||||
M(DistrCacheServerReusedS3CachedClients, "Distributed Cache server event. The number of reused cached s3 clients", ValueType::Number) \
|
||||
\
|
||||
M(LogTest, "Number of log messages with level Test", ValueType::Number) \
|
||||
M(LogTrace, "Number of log messages with level Trace", ValueType::Number) \
|
||||
@ -788,15 +814,38 @@ The server successfully detected this situation and will download merged part fr
|
||||
M(InterfacePostgreSQLReceiveBytes, "Number of bytes received through PostgreSQL interfaces", ValueType::Bytes) \
|
||||
\
|
||||
M(ParallelReplicasUsedCount, "Number of replicas used to execute a query with task-based parallel replicas", ValueType::Number) \
|
||||
M(ParallelReplicasAvailableCount, "Number of replicas available to execute a query with task-based parallel replicas", ValueType::Number) \
|
||||
M(ParallelReplicasUnavailableCount, "Number of replicas which was chosen, but found to be unavailable during query execution with task-based parallel replicas", ValueType::Number) \
|
||||
\
|
||||
M(SharedMergeTreeVirtualPartsUpdates, "Virtual parts update count", ValueType::Number) \
|
||||
M(SharedMergeTreeVirtualPartsUpdatesByLeader, "Virtual parts updates by leader", ValueType::Number) \
|
||||
M(SharedMergeTreeVirtualPartsUpdateMicroseconds, "Virtual parts update microseconds", ValueType::Microseconds) \
|
||||
M(SharedMergeTreeVirtualPartsUpdatesFromZooKeeper, "Virtual parts updates count from ZooKeeper", ValueType::Number) \
|
||||
M(SharedMergeTreeVirtualPartsUpdatesFromZooKeeperMicroseconds, "Virtual parts updates from ZooKeeper microseconds", ValueType::Microseconds) \
|
||||
M(SharedMergeTreeVirtualPartsUpdatesPeerNotFound, "Virtual updates from peer failed because no one found", ValueType::Number) \
|
||||
M(SharedMergeTreeVirtualPartsUpdatesFromPeer, "Virtual parts updates count from peer", ValueType::Number) \
|
||||
M(SharedMergeTreeVirtualPartsUpdatesFromPeerMicroseconds, "Virtual parts updates from peer microseconds", ValueType::Microseconds) \
|
||||
M(SharedMergeTreeVirtualPartsUpdatesForMergesOrStatus, "Virtual parts updates from non-default background job", ValueType::Number) \
|
||||
M(SharedMergeTreeVirtualPartsUpdatesLeaderFailedElection, "Virtual parts updates leader election failed", ValueType::Number) \
|
||||
M(SharedMergeTreeVirtualPartsUpdatesLeaderSuccessfulElection, "Virtual parts updates leader election successful", ValueType::Number) \
|
||||
M(SharedMergeTreeMergeMutationAssignmentAttempt, "How many times we tried to assign merge or mutation", ValueType::Number) \
|
||||
M(SharedMergeTreeMergeMutationAssignmentFailedWithNothingToDo, "How many times we tried to assign merge or mutation and failed because nothing to merge", ValueType::Number) \
|
||||
M(SharedMergeTreeMergeMutationAssignmentFailedWithConflict, "How many times we tried to assign merge or mutation and failed because of conflict in Keeper", ValueType::Number) \
|
||||
M(SharedMergeTreeMergeMutationAssignmentSuccessful, "How many times we tried to assign merge or mutation", ValueType::Number) \
|
||||
M(SharedMergeTreeMergePartsMovedToOudated, "How many parts moved to oudated directory", ValueType::Number) \
|
||||
M(SharedMergeTreeMergePartsMovedToCondemned, "How many parts moved to condemned directory", ValueType::Number) \
|
||||
M(SharedMergeTreeOutdatedPartsConfirmationRequest, "How many ZooKeeper requests were used to config outdated parts", ValueType::Number) \
|
||||
M(SharedMergeTreeOutdatedPartsConfirmationInvocations, "How many invocations were made to confirm outdated parts", ValueType::Number) \
|
||||
M(SharedMergeTreeOutdatedPartsHTTPRequest, "How many HTTP requests were send to confirm outdated parts", ValueType::Number) \
|
||||
M(SharedMergeTreeOutdatedPartsHTTPResponse, "How many HTTP responses were send to confirm outdated parts", ValueType::Number) \
|
||||
M(SharedMergeTreeCondemnedPartsKillRequest, "How many ZooKeeper requests were used to remove condemned parts", ValueType::Number) \
|
||||
M(SharedMergeTreeCondemnedPartsLockConfict, "How many times we failed to acquite lock because of conflict", ValueType::Number) \
|
||||
M(SharedMergeTreeCondemnedPartsRemoved, "How many condemned parts were removed", ValueType::Number) \
|
||||
M(KeeperLogsEntryReadFromLatestCache, "Number of log entries in Keeper being read from latest logs cache", ValueType::Number) \
|
||||
M(KeeperLogsEntryReadFromCommitCache, "Number of log entries in Keeper being read from commit logs cache", ValueType::Number) \
|
||||
M(KeeperLogsEntryReadFromFile, "Number of log entries in Keeper being read directly from the changelog file", ValueType::Number) \
|
||||
M(KeeperLogsPrefetchedEntries, "Number of log entries in Keeper being prefetched from the changelog file", ValueType::Number) \
|
||||
\
|
||||
M(ParallelReplicasAvailableCount, "Number of replicas available to execute a query with task-based parallel replicas", ValueType::Number) \
|
||||
M(ParallelReplicasUnavailableCount, "Number of replicas which was chosen, but found to be unavailable during query execution with task-based parallel replicas", ValueType::Number) \
|
||||
\
|
||||
M(StorageConnectionsCreated, "Number of created connections for storages", ValueType::Number) \
|
||||
M(StorageConnectionsReused, "Number of reused connections for storages", ValueType::Number) \
|
||||
M(StorageConnectionsReset, "Number of reset connections for storages", ValueType::Number) \
|
||||
@ -828,6 +877,9 @@ The server successfully detected this situation and will download merged part fr
|
||||
M(ReadWriteBufferFromHTTPRequestsSent, "Number of HTTP requests sent by ReadWriteBufferFromHTTP", ValueType::Number) \
|
||||
M(ReadWriteBufferFromHTTPBytes, "Total size of payload bytes received and sent by ReadWriteBufferFromHTTP. Doesn't include HTTP headers.", ValueType::Bytes) \
|
||||
\
|
||||
M(SharedDatabaseCatalogFailedToApplyState, "Number of failures to apply new state in SharedDatabaseCatalog", ValueType::Number) \
|
||||
M(SharedDatabaseCatalogStateApplicationMicroseconds, "Total time spend on application of new state in SharedDatabaseCatalog", ValueType::Microseconds) \
|
||||
\
|
||||
M(GWPAsanAllocateSuccess, "Number of successful allocations done by GWPAsan", ValueType::Number) \
|
||||
M(GWPAsanAllocateFailed, "Number of failed allocations done by GWPAsan (i.e. filled pool)", ValueType::Number) \
|
||||
M(GWPAsanFree, "Number of free operations done by GWPAsan", ValueType::Number) \
|
||||
|
@ -38,6 +38,9 @@ namespace ProfileEvents
|
||||
};
|
||||
Timer(Counters & counters_, Event timer_event_, Resolution resolution_);
|
||||
Timer(Counters & counters_, Event timer_event_, Event counter_event, Resolution resolution_);
|
||||
Timer(Timer && other) noexcept
|
||||
: counters(other.counters), timer_event(std::move(other.timer_event)), watch(std::move(other.watch)), resolution(std::move(other.resolution))
|
||||
{}
|
||||
~Timer() { end(); }
|
||||
void cancel() { watch.reset(); }
|
||||
void restart() { watch.restart(); }
|
||||
|
@ -31,7 +31,7 @@ std::string RemoteProxyHostFetcherImpl::fetch(const Poco::URI & endpoint, const
|
||||
endpoint.toString(),
|
||||
response.getStatus(),
|
||||
response.getReason(),
|
||||
"");
|
||||
/* body_length = */ 0);
|
||||
|
||||
std::string proxy_host;
|
||||
Poco::StreamCopier::copyToString(response_body_stream, proxy_host);
|
||||
|
@ -159,6 +159,8 @@ public:
|
||||
const std::string & getLastKeeperErrorMessage() const { return keeper_error.message; }
|
||||
|
||||
/// action will be called only once and only after latest failed retry
|
||||
/// NOTE: this one will be called only in case when retries finishes with Keeper exception
|
||||
/// if it will be some other exception this function will not be called.
|
||||
void actionAfterLastFailedRetry(std::function<void()> f) { action_after_last_failed_retry = std::move(f); }
|
||||
|
||||
const std::string & getName() const { return name; }
|
||||
|
@ -25,15 +25,11 @@ namespace
|
||||
* `curl` strips leading dot and accepts url gitlab.com as a match for no_proxy .gitlab.com,
|
||||
* while `wget` does an exact match.
|
||||
* */
|
||||
std::string buildPocoRegexpEntryWithoutLeadingDot(const std::string & host)
|
||||
std::string buildPocoRegexpEntryWithoutLeadingDot(std::string_view host)
|
||||
{
|
||||
std::string_view view_without_leading_dot = host;
|
||||
if (host[0] == '.')
|
||||
{
|
||||
view_without_leading_dot = std::string_view {host.begin() + 1u, host.end()};
|
||||
}
|
||||
|
||||
return RE2::QuoteMeta(view_without_leading_dot);
|
||||
if (host.starts_with('.'))
|
||||
host.remove_prefix(1);
|
||||
return RE2::QuoteMeta(host);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -170,6 +170,9 @@ Avoid reordering rows when reading from Parquet files. Usually makes it much slo
|
||||
)", 0) \
|
||||
M(Bool, input_format_parquet_filter_push_down, true, R"(
|
||||
When reading Parquet files, skip whole row groups based on the WHERE/PREWHERE expressions and min/max statistics in the Parquet metadata.
|
||||
)", 0) \
|
||||
M(Bool, input_format_parquet_bloom_filter_push_down, false, R"(
|
||||
When reading Parquet files, skip whole row groups based on the WHERE expressions and bloom filter in the Parquet metadata.
|
||||
)", 0) \
|
||||
M(Bool, input_format_parquet_use_native_reader, false, R"(
|
||||
When reading Parquet files, to use native reader instead of arrow reader.
|
||||
@ -190,6 +193,9 @@ When reading ORC files, skip whole stripes or row groups based on the WHERE/PREW
|
||||
)", 0) \
|
||||
M(String, input_format_orc_reader_time_zone_name, "GMT", R"(
|
||||
The time zone name for ORC row reader, the default ORC row reader's time zone is GMT.
|
||||
)", 0) \
|
||||
M(Bool, input_format_orc_dictionary_as_low_cardinality, true, R"(
|
||||
Treat ORC dictionary encoded columns as LowCardinality columns while reading ORC files.
|
||||
)", 0) \
|
||||
M(Bool, input_format_parquet_allow_missing_columns, true, R"(
|
||||
Allow missing columns while reading Parquet input formats
|
||||
|
@ -2700,7 +2700,7 @@ The maximum read speed in bytes per second for particular backup on server. Zero
|
||||
Log query performance statistics into the query_log, query_thread_log and query_views_log.
|
||||
)", 0) \
|
||||
M(Bool, log_query_settings, true, R"(
|
||||
Log query settings into the query_log.
|
||||
Log query settings into the query_log and OpenTelemetry span log.
|
||||
)", 0) \
|
||||
M(Bool, log_query_threads, false, R"(
|
||||
Setting up query threads logging.
|
||||
@ -4812,6 +4812,9 @@ Max attempts to read with backoff
|
||||
)", 0) \
|
||||
M(Bool, enable_filesystem_cache, true, R"(
|
||||
Use cache for remote filesystem. This setting does not turn on/off cache for disks (must be done via disk config), but allows to bypass cache for some queries if intended
|
||||
)", 0) \
|
||||
M(String, filesystem_cache_name, "", R"(
|
||||
Filesystem cache name to use for stateless table engines or data lakes
|
||||
)", 0) \
|
||||
M(Bool, enable_filesystem_cache_on_write_operations, false, R"(
|
||||
Write into cache on write operations. To actually work this setting requires be added to disk config too
|
||||
@ -5151,7 +5154,7 @@ SELECT * FROM test_table
|
||||
Rewrite count distinct to subquery of group by
|
||||
)", 0) \
|
||||
M(Bool, throw_if_no_data_to_insert, true, R"(
|
||||
Allows or forbids empty INSERTs, enabled by default (throws an error on an empty insert)
|
||||
Allows or forbids empty INSERTs, enabled by default (throws an error on an empty insert). Only applies to INSERTs using [`clickhouse-client`](/docs/en/interfaces/cli) or using the [gRPC interface](/docs/en/interfaces/grpc).
|
||||
)", 0) \
|
||||
M(Bool, compatibility_ignore_auto_increment_in_create_table, false, R"(
|
||||
Ignore AUTO_INCREMENT keyword in column declaration if true, otherwise return error. It simplifies migration from MySQL
|
||||
@ -5376,7 +5379,7 @@ Result:
|
||||
If enabled, server will ignore all DROP table queries with specified probability (for Memory and JOIN engines it will replcase DROP to TRUNCATE). Used for testing purposes
|
||||
)", 0) \
|
||||
M(Bool, traverse_shadow_remote_data_paths, false, R"(
|
||||
Traverse shadow directory when query system.remote_data_paths
|
||||
Traverse frozen data (shadow directory) in addition to actual table data when query system.remote_data_paths
|
||||
)", 0) \
|
||||
M(Bool, geo_distance_returns_float64_on_float64_arguments, true, R"(
|
||||
If all four arguments to `geoDistance`, `greatCircleDistance`, `greatCircleAngle` functions are Float64, return Float64 and use double precision for internal calculations. In previous ClickHouse versions, the functions always returned Float32.
|
||||
@ -5498,8 +5501,8 @@ Replace external dictionary sources to Null on restore. Useful for testing purpo
|
||||
M(Bool, create_if_not_exists, false, R"(
|
||||
Enable `IF NOT EXISTS` for `CREATE` statement by default. If either this setting or `IF NOT EXISTS` is specified and a table with the provided name already exists, no exception will be thrown.
|
||||
)", 0) \
|
||||
M(Bool, enable_secure_identifiers, false, R"(
|
||||
If enabled, only allow secure identifiers which contain only underscore and alphanumeric characters
|
||||
M(Bool, enforce_strict_identifier_format, false, R"(
|
||||
If enabled, only allow identifiers containing alphanumeric characters and underscores.
|
||||
)", 0) \
|
||||
M(Bool, mongodb_throw_on_unsupported_query, true, R"(
|
||||
If enabled, MongoDB tables will return an error when a MongoDB query cannot be built. Otherwise, ClickHouse reads the full table and processes it locally. This option does not apply to the legacy implementation or when 'allow_experimental_analyzer=0'.
|
||||
@ -5785,9 +5788,6 @@ The heartbeat interval in seconds to indicate watch query is alive.
|
||||
Timeout for waiting for window view fire signal in event time processing
|
||||
)", 0) \
|
||||
\
|
||||
M(Bool, allow_experimental_refreshable_materialized_view, false, R"(
|
||||
Allow refreshable materialized views (CREATE MATERIALIZED VIEW \\<name\\> REFRESH ...).
|
||||
)", 0) \
|
||||
M(Bool, stop_refreshable_materialized_views_on_startup, false, R"(
|
||||
On server startup, prevent scheduling of refreshable materialized views, as if with SYSTEM STOP VIEWS. You can manually start them with SYSTEM START VIEWS or SYSTEM START VIEW \\<name\\> afterwards. Also applies to newly created views. Has no effect on non-refreshable materialized views.
|
||||
)", 0) \
|
||||
@ -5823,6 +5823,7 @@ Experimental data deduplication for SELECT queries based on part UUIDs
|
||||
MAKE_OBSOLETE(M, Bool, allow_experimental_alter_materialized_view_structure, true) \
|
||||
MAKE_OBSOLETE(M, Bool, allow_experimental_shared_merge_tree, true) \
|
||||
MAKE_OBSOLETE(M, Bool, allow_experimental_database_replicated, true) \
|
||||
MAKE_OBSOLETE(M, Bool, allow_experimental_refreshable_materialized_view, true) \
|
||||
\
|
||||
MAKE_OBSOLETE(M, Milliseconds, async_insert_stale_timeout_ms, 0) \
|
||||
MAKE_OBSOLETE(M, StreamingHandleErrorMode, handle_kafka_error_mode, StreamingHandleErrorMode::DEFAULT) \
|
||||
@ -6201,6 +6202,16 @@ std::vector<std::string_view> Settings::getUnchangedNames() const
|
||||
return setting_names;
|
||||
}
|
||||
|
||||
std::vector<std::string_view> Settings::getChangedNames() const
|
||||
{
|
||||
std::vector<std::string_view> setting_names;
|
||||
for (const auto & setting : impl->allChanged())
|
||||
{
|
||||
setting_names.emplace_back(setting.getName());
|
||||
}
|
||||
return setting_names;
|
||||
}
|
||||
|
||||
void Settings::dumpToSystemSettingsColumns(MutableColumnsAndConstraints & params) const
|
||||
{
|
||||
MutableColumns & res_columns = params.res_columns;
|
||||
|
@ -134,6 +134,7 @@ struct Settings
|
||||
std::vector<std::string_view> getAllRegisteredNames() const;
|
||||
std::vector<std::string_view> getChangedAndObsoleteNames() const;
|
||||
std::vector<std::string_view> getUnchangedNames() const;
|
||||
std::vector<std::string_view> getChangedNames() const;
|
||||
|
||||
void dumpToSystemSettingsColumns(MutableColumnsAndConstraints & params) const;
|
||||
void dumpToMapColumn(IColumn * column, bool changed_only = true) const;
|
||||
|
@ -68,17 +68,18 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
|
||||
},
|
||||
{"24.10",
|
||||
{
|
||||
{"enforce_strict_identifier_format", false, false, "New setting."},
|
||||
{"enable_parsing_to_custom_serialization", false, true, "New setting"},
|
||||
{"mongodb_throw_on_unsupported_query", false, true, "New setting."},
|
||||
{"enable_parallel_replicas", false, false, "Parallel replicas with read tasks became the Beta tier feature."},
|
||||
{"parallel_replicas_mode", "read_tasks", "read_tasks", "This setting was introduced as a part of making parallel replicas feature Beta"},
|
||||
{"filesystem_cache_name", "", "", "Filesystem cache name to use for stateless table engines or data lakes"},
|
||||
{"restore_replace_external_dictionary_source_to_null", false, false, "New setting."},
|
||||
{"show_create_query_identifier_quoting_rule", "when_necessary", "when_necessary", "New setting."},
|
||||
{"show_create_query_identifier_quoting_style", "Backticks", "Backticks", "New setting."},
|
||||
{"output_format_native_write_json_as_string", false, false, "Add new setting to allow write JSON column as single String column in Native format"},
|
||||
{"output_format_binary_write_json_as_string", false, false, "Add new setting to write values of JSON type as JSON string in RowBinary output format"},
|
||||
{"input_format_binary_read_json_as_string", false, false, "Add new setting to read values of JSON type as JSON string in RowBinary input format"},
|
||||
{"enable_secure_identifiers", false, false, "New setting."},
|
||||
{"min_free_disk_bytes_to_perform_insert", 0, 0, "New setting."},
|
||||
{"min_free_disk_ratio_to_perform_insert", 0.0, 0.0, "New setting."},
|
||||
{"cloud_mode_database_engine", 1, 1, "A setting for ClickHouse Cloud"},
|
||||
@ -97,7 +98,10 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
|
||||
{"distributed_cache_read_alignment", 0, 0, "A setting for ClickHouse Cloud"},
|
||||
{"distributed_cache_max_unacked_inflight_packets", 10, 10, "A setting for ClickHouse Cloud"},
|
||||
{"distributed_cache_data_packet_ack_window", 5, 5, "A setting for ClickHouse Cloud"},
|
||||
{"input_format_orc_dictionary_as_low_cardinality", false, true, "Treat ORC dictionary encoded columns as LowCardinality columns while reading ORC files"},
|
||||
{"allow_experimental_refreshable_materialized_view", false, true, "Not experimental anymore"},
|
||||
{"max_parts_to_move", 1000, 1000, "New setting"},
|
||||
{"input_format_parquet_bloom_filter_push_down", false, true, "When reading Parquet files, skip whole row groups based on the WHERE/PREWHERE expressions and bloom filter in the Parquet metadata."},
|
||||
{"date_time_64_output_format_cut_trailing_zeros_align_to_groups_of_thousands", false, false, "Dynamically trim the trailing zeros of datetime64 values to adjust the output scale to (0, 3, 6), corresponding to 'seconds', 'milliseconds', and 'microseconds'."}
|
||||
}
|
||||
},
|
||||
@ -111,7 +115,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
|
||||
{"allow_materialized_view_with_bad_select", true, true, "Support (but not enable yet) stricter validation in CREATE MATERIALIZED VIEW"},
|
||||
{"parallel_replicas_mark_segment_size", 128, 0, "Value for this setting now determined automatically"},
|
||||
{"database_replicated_allow_replicated_engine_arguments", 1, 0, "Don't allow explicit arguments by default"},
|
||||
{"database_replicated_allow_explicit_uuid", 0, 0, "Added a new setting to disallow explicitly specifying table UUID"},
|
||||
{"database_replicated_allow_explicit_uuid", 1, 0, "Added a new setting to disallow explicitly specifying table UUID"},
|
||||
{"parallel_replicas_local_plan", false, false, "Use local plan for local replica in a query with parallel replicas"},
|
||||
{"join_to_sort_minimum_perkey_rows", 0, 40, "The lower limit of per-key average rows in the right table to determine whether to rerange the right table by key in left or inner join. This setting ensures that the optimization is not applied for sparse table keys"},
|
||||
{"join_to_sort_maximum_table_rows", 0, 10000, "The maximum number of rows in the right table to determine whether to rerange the right table by key in left or inner join"},
|
||||
|
@ -36,8 +36,8 @@ public:
|
||||
|
||||
auto findByValue(const T & value) const
|
||||
{
|
||||
const auto it = value_to_name_map.find(value);
|
||||
if (it == std::end(value_to_name_map))
|
||||
auto it = value_to_name_map.find(value);
|
||||
if (it == value_to_name_map.end())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected value {} in enum", toString(value));
|
||||
|
||||
return it;
|
||||
@ -58,7 +58,7 @@ public:
|
||||
bool getNameForValue(const T & value, StringRef & result) const
|
||||
{
|
||||
const auto it = value_to_name_map.find(value);
|
||||
if (it == std::end(value_to_name_map))
|
||||
if (it == value_to_name_map.end())
|
||||
return false;
|
||||
|
||||
result = it->second;
|
||||
|
@ -321,6 +321,8 @@ bool isUInt8(TYPE data_type) { return WhichDataType(data_type).isUInt8(); } \
|
||||
bool isUInt16(TYPE data_type) { return WhichDataType(data_type).isUInt16(); } \
|
||||
bool isUInt32(TYPE data_type) { return WhichDataType(data_type).isUInt32(); } \
|
||||
bool isUInt64(TYPE data_type) { return WhichDataType(data_type).isUInt64(); } \
|
||||
bool isUInt128(TYPE data_type) { return WhichDataType(data_type).isUInt128(); } \
|
||||
bool isUInt256(TYPE data_type) { return WhichDataType(data_type).isUInt256(); } \
|
||||
bool isNativeUInt(TYPE data_type) { return WhichDataType(data_type).isNativeUInt(); } \
|
||||
bool isUInt(TYPE data_type) { return WhichDataType(data_type).isUInt(); } \
|
||||
\
|
||||
@ -328,6 +330,8 @@ bool isInt8(TYPE data_type) { return WhichDataType(data_type).isInt8(); } \
|
||||
bool isInt16(TYPE data_type) { return WhichDataType(data_type).isInt16(); } \
|
||||
bool isInt32(TYPE data_type) { return WhichDataType(data_type).isInt32(); } \
|
||||
bool isInt64(TYPE data_type) { return WhichDataType(data_type).isInt64(); } \
|
||||
bool isInt128(TYPE data_type) { return WhichDataType(data_type).isInt128(); } \
|
||||
bool isInt256(TYPE data_type) { return WhichDataType(data_type).isInt256(); } \
|
||||
bool isNativeInt(TYPE data_type) { return WhichDataType(data_type).isNativeInt(); } \
|
||||
bool isInt(TYPE data_type) { return WhichDataType(data_type).isInt(); } \
|
||||
\
|
||||
|
@ -457,7 +457,9 @@ struct WhichDataType
|
||||
bool isUInt8(TYPE data_type); \
|
||||
bool isUInt16(TYPE data_type); \
|
||||
bool isUInt32(TYPE data_type); \
|
||||
bool isUInt64(TYPE data_type); \
|
||||
bool isUInt64(TYPE data_type);\
|
||||
bool isUInt128(TYPE data_type);\
|
||||
bool isUInt256(TYPE data_type); \
|
||||
bool isNativeUInt(TYPE data_type); \
|
||||
bool isUInt(TYPE data_type); \
|
||||
\
|
||||
@ -465,6 +467,8 @@ bool isInt8(TYPE data_type); \
|
||||
bool isInt16(TYPE data_type); \
|
||||
bool isInt32(TYPE data_type); \
|
||||
bool isInt64(TYPE data_type); \
|
||||
bool isInt128(TYPE data_type); \
|
||||
bool isInt256(TYPE data_type); \
|
||||
bool isNativeInt(TYPE data_type); \
|
||||
bool isInt(TYPE data_type); \
|
||||
\
|
||||
|
@ -111,9 +111,9 @@ std::future<IAsynchronousReader::Result> ThreadPoolReader::submit(Request reques
|
||||
/// RWF_NOWAIT flag may return 0 even when not at end of file.
|
||||
/// It can't be distinguished from the real eof, so we have to
|
||||
/// disable pread with nowait.
|
||||
static std::atomic<bool> has_pread_nowait_support = !hasBugInPreadV2();
|
||||
static const bool has_pread_nowait_support = !hasBugInPreadV2();
|
||||
|
||||
if (has_pread_nowait_support.load(std::memory_order_relaxed))
|
||||
if (has_pread_nowait_support)
|
||||
{
|
||||
/// It reports real time spent including the time spent while thread was preempted doing nothing.
|
||||
/// And it is Ok for the purpose of this watch (it is used to lower the number of threads to read from tables).
|
||||
@ -161,7 +161,8 @@ std::future<IAsynchronousReader::Result> ThreadPoolReader::submit(Request reques
|
||||
if (errno == ENOSYS || errno == EOPNOTSUPP)
|
||||
{
|
||||
/// No support for the syscall or the flag in the Linux kernel.
|
||||
has_pread_nowait_support.store(false, std::memory_order_relaxed);
|
||||
/// It shouldn't happen because we check the kernel version but let's
|
||||
/// fallback to the thread pool.
|
||||
break;
|
||||
}
|
||||
if (errno == EAGAIN)
|
||||
|
@ -31,7 +31,7 @@ CachedObjectStorage::CachedObjectStorage(
|
||||
|
||||
FileCache::Key CachedObjectStorage::getCacheKey(const std::string & path) const
|
||||
{
|
||||
return cache->createKeyForPath(path);
|
||||
return FileCacheKey::fromPath(path);
|
||||
}
|
||||
|
||||
ObjectStorageKey
|
||||
@ -71,7 +71,7 @@ std::unique_ptr<ReadBufferFromFileBase> CachedObjectStorage::readObject( /// NOL
|
||||
{
|
||||
if (cache->isInitialized())
|
||||
{
|
||||
auto cache_key = cache->createKeyForPath(object.remote_path);
|
||||
auto cache_key = FileCacheKey::fromPath(object.remote_path);
|
||||
auto global_context = Context::getGlobalContextInstance();
|
||||
auto modified_read_settings = read_settings.withNestedBuffer();
|
||||
|
||||
|
@ -92,12 +92,26 @@ std::unique_ptr<S3::Client> getClient(
|
||||
"Region should be explicitly specified for directory buckets");
|
||||
}
|
||||
|
||||
const Settings & local_settings = context->getSettingsRef();
|
||||
|
||||
int s3_max_redirects = static_cast<int>(global_settings[Setting::s3_max_redirects]);
|
||||
if (!for_disk_s3 && local_settings.isChanged("s3_max_redirects"))
|
||||
s3_max_redirects = static_cast<int>(local_settings[Setting::s3_max_redirects]);
|
||||
|
||||
int s3_retry_attempts = static_cast<int>(global_settings[Setting::s3_retry_attempts]);
|
||||
if (!for_disk_s3 && local_settings.isChanged("s3_retry_attempts"))
|
||||
s3_retry_attempts = static_cast<int>(local_settings[Setting::s3_retry_attempts]);
|
||||
|
||||
bool enable_s3_requests_logging = global_settings[Setting::enable_s3_requests_logging];
|
||||
if (!for_disk_s3 && local_settings.isChanged("enable_s3_requests_logging"))
|
||||
enable_s3_requests_logging = local_settings[Setting::enable_s3_requests_logging];
|
||||
|
||||
S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration(
|
||||
auth_settings.region,
|
||||
context->getRemoteHostFilter(),
|
||||
static_cast<int>(global_settings[Setting::s3_max_redirects]),
|
||||
static_cast<int>(global_settings[Setting::s3_retry_attempts]),
|
||||
global_settings[Setting::enable_s3_requests_logging],
|
||||
s3_max_redirects,
|
||||
s3_retry_attempts,
|
||||
enable_s3_requests_logging,
|
||||
for_disk_s3,
|
||||
request_settings.get_request_throttler,
|
||||
request_settings.put_request_throttler,
|
||||
|
@ -192,6 +192,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
|
||||
format_settings.parquet.case_insensitive_column_matching = settings[Setting::input_format_parquet_case_insensitive_column_matching];
|
||||
format_settings.parquet.preserve_order = settings[Setting::input_format_parquet_preserve_order];
|
||||
format_settings.parquet.filter_push_down = settings[Setting::input_format_parquet_filter_push_down];
|
||||
format_settings.parquet.bloom_filter_push_down = settings[Setting::input_format_parquet_bloom_filter_push_down];
|
||||
format_settings.parquet.use_native_reader = settings[Setting::input_format_parquet_use_native_reader];
|
||||
format_settings.parquet.allow_missing_columns = settings[Setting::input_format_parquet_allow_missing_columns];
|
||||
format_settings.parquet.skip_columns_with_unsupported_types_in_schema_inference = settings[Setting::input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference];
|
||||
@ -266,9 +267,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
|
||||
format_settings.orc.allow_missing_columns = settings[Setting::input_format_orc_allow_missing_columns];
|
||||
format_settings.orc.row_batch_size = settings[Setting::input_format_orc_row_batch_size];
|
||||
format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference = settings[Setting::input_format_orc_skip_columns_with_unsupported_types_in_schema_inference];
|
||||
format_settings.orc.allow_missing_columns = settings[Setting::input_format_orc_allow_missing_columns];
|
||||
format_settings.orc.row_batch_size = settings[Setting::input_format_orc_row_batch_size];
|
||||
format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference = settings[Setting::input_format_orc_skip_columns_with_unsupported_types_in_schema_inference];
|
||||
format_settings.orc.dictionary_as_low_cardinality = settings[Setting::input_format_orc_dictionary_as_low_cardinality];
|
||||
format_settings.orc.case_insensitive_column_matching = settings[Setting::input_format_orc_case_insensitive_column_matching];
|
||||
format_settings.orc.output_string_as_string = settings[Setting::output_format_orc_string_as_string];
|
||||
format_settings.orc.output_compression_method = settings[Setting::output_format_orc_compression_method];
|
||||
|
@ -276,6 +276,7 @@ struct FormatSettings
|
||||
bool skip_columns_with_unsupported_types_in_schema_inference = false;
|
||||
bool case_insensitive_column_matching = false;
|
||||
bool filter_push_down = true;
|
||||
bool bloom_filter_push_down = true;
|
||||
bool use_native_reader = false;
|
||||
std::unordered_set<int> skip_row_groups = {};
|
||||
bool output_string_as_string = false;
|
||||
@ -419,6 +420,7 @@ struct FormatSettings
|
||||
bool filter_push_down = true;
|
||||
UInt64 output_row_index_stride = 10'000;
|
||||
String reader_time_zone_name = "GMT";
|
||||
bool dictionary_as_low_cardinality = true;
|
||||
double output_dictionary_key_size_threshold = 0.0;
|
||||
} orc{};
|
||||
|
||||
|
@ -110,7 +110,7 @@ private:
|
||||
};
|
||||
|
||||
|
||||
const google::protobuf::Descriptor *
|
||||
ProtobufSchemas::DescriptorHolder
|
||||
ProtobufSchemas::getMessageTypeForFormatSchema(const FormatSchemaInfo & info, WithEnvelope with_envelope, const String & google_protos_path)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
@ -119,10 +119,10 @@ ProtobufSchemas::getMessageTypeForFormatSchema(const FormatSchemaInfo & info, Wi
|
||||
it = importers
|
||||
.emplace(
|
||||
info.schemaDirectory(),
|
||||
std::make_unique<ImporterWithSourceTree>(info.schemaDirectory(), google_protos_path, with_envelope))
|
||||
std::make_shared<ImporterWithSourceTree>(info.schemaDirectory(), google_protos_path, with_envelope))
|
||||
.first;
|
||||
auto * importer = it->second.get();
|
||||
return importer->import(info.schemaPath(), info.messageName());
|
||||
return DescriptorHolder(it->second, importer->import(info.schemaPath(), info.messageName()));
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -57,14 +57,31 @@ public:
|
||||
// Clear cached protobuf schemas
|
||||
void clear();
|
||||
|
||||
/// Parses the format schema, then parses the corresponding proto file, and returns the descriptor of the message type.
|
||||
/// The function never returns nullptr, it throws an exception if it cannot load or parse the file.
|
||||
const google::protobuf::Descriptor *
|
||||
class ImporterWithSourceTree;
|
||||
struct DescriptorHolder
|
||||
{
|
||||
DescriptorHolder(std::shared_ptr<ImporterWithSourceTree> importer_, const google::protobuf::Descriptor * message_descriptor_)
|
||||
: importer(std::move(importer_))
|
||||
, message_descriptor(message_descriptor_)
|
||||
{}
|
||||
private:
|
||||
std::shared_ptr<ImporterWithSourceTree> importer;
|
||||
public:
|
||||
const google::protobuf::Descriptor * message_descriptor;
|
||||
};
|
||||
|
||||
/// Parses the format schema, then parses the corresponding proto file, and
|
||||
/// returns holder (since the descriptor only valid if
|
||||
/// ImporterWithSourceTree is valid):
|
||||
///
|
||||
/// {ImporterWithSourceTree, protobuf::Descriptor - descriptor of the message type}.
|
||||
///
|
||||
/// The function always return valid message descriptor, it throws an exception if it cannot load or parse the file.
|
||||
DescriptorHolder
|
||||
getMessageTypeForFormatSchema(const FormatSchemaInfo & info, WithEnvelope with_envelope, const String & google_protos_path);
|
||||
|
||||
private:
|
||||
class ImporterWithSourceTree;
|
||||
std::unordered_map<String, std::unique_ptr<ImporterWithSourceTree>> importers;
|
||||
std::unordered_map<String, std::shared_ptr<ImporterWithSourceTree>> importers;
|
||||
std::mutex mutex;
|
||||
};
|
||||
|
||||
|
@ -3864,26 +3864,32 @@ std::unique_ptr<ProtobufSerializer> ProtobufSerializer::create(
|
||||
const Strings & column_names,
|
||||
const DataTypes & data_types,
|
||||
std::vector<size_t> & missing_column_indices,
|
||||
const google::protobuf::Descriptor & message_descriptor,
|
||||
const ProtobufSchemas::DescriptorHolder & descriptor,
|
||||
bool with_length_delimiter,
|
||||
bool with_envelope,
|
||||
bool flatten_google_wrappers,
|
||||
ProtobufReader & reader)
|
||||
{
|
||||
return ProtobufSerializerBuilder(reader).buildMessageSerializer(column_names, data_types, missing_column_indices, message_descriptor, with_length_delimiter, with_envelope, flatten_google_wrappers);
|
||||
return ProtobufSerializerBuilder(reader).buildMessageSerializer(
|
||||
column_names, data_types, missing_column_indices,
|
||||
*descriptor.message_descriptor,
|
||||
with_length_delimiter, with_envelope, flatten_google_wrappers);
|
||||
}
|
||||
|
||||
std::unique_ptr<ProtobufSerializer> ProtobufSerializer::create(
|
||||
const Strings & column_names,
|
||||
const DataTypes & data_types,
|
||||
const google::protobuf::Descriptor & message_descriptor,
|
||||
const ProtobufSchemas::DescriptorHolder & descriptor,
|
||||
bool with_length_delimiter,
|
||||
bool with_envelope,
|
||||
bool defaults_for_nullable_google_wrappers,
|
||||
ProtobufWriter & writer)
|
||||
{
|
||||
std::vector<size_t> missing_column_indices;
|
||||
return ProtobufSerializerBuilder(writer).buildMessageSerializer(column_names, data_types, missing_column_indices, message_descriptor, with_length_delimiter, with_envelope, defaults_for_nullable_google_wrappers);
|
||||
return ProtobufSerializerBuilder(writer).buildMessageSerializer(
|
||||
column_names, data_types, missing_column_indices,
|
||||
*descriptor.message_descriptor,
|
||||
with_length_delimiter, with_envelope, defaults_for_nullable_google_wrappers);
|
||||
}
|
||||
|
||||
NamesAndTypesList protobufSchemaToCHSchema(const google::protobuf::Descriptor * message_descriptor, bool skip_unsupported_fields)
|
||||
|
@ -4,7 +4,8 @@
|
||||
|
||||
#if USE_PROTOBUF
|
||||
# include <Columns/IColumn.h>
|
||||
#include <Core/NamesAndTypes.h>
|
||||
# include <Core/NamesAndTypes.h>
|
||||
# include <Formats/ProtobufSchemas.h>
|
||||
|
||||
|
||||
namespace google::protobuf { class Descriptor; }
|
||||
@ -39,7 +40,7 @@ public:
|
||||
const Strings & column_names,
|
||||
const DataTypes & data_types,
|
||||
std::vector<size_t> & missing_column_indices,
|
||||
const google::protobuf::Descriptor & message_descriptor,
|
||||
const ProtobufSchemas::DescriptorHolder & descriptor,
|
||||
bool with_length_delimiter,
|
||||
bool with_envelope,
|
||||
bool flatten_google_wrappers,
|
||||
@ -48,7 +49,7 @@ public:
|
||||
static std::unique_ptr<ProtobufSerializer> create(
|
||||
const Strings & column_names,
|
||||
const DataTypes & data_types,
|
||||
const google::protobuf::Descriptor & message_descriptor,
|
||||
const ProtobufSchemas::DescriptorHolder & descriptor,
|
||||
bool with_length_delimiter,
|
||||
bool with_envelope,
|
||||
bool defaults_for_nullable_google_wrappers,
|
||||
|
@ -84,11 +84,9 @@ void assertResponseIsOk(const String & uri, Poco::Net::HTTPResponse & response,
|
||||
? ErrorCodes::RECEIVED_ERROR_TOO_MANY_REQUESTS
|
||||
: ErrorCodes::RECEIVED_ERROR_FROM_REMOTE_IO_SERVER;
|
||||
|
||||
std::stringstream body; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
|
||||
body.exceptions(std::ios::failbit);
|
||||
body << istr.rdbuf();
|
||||
|
||||
throw HTTPException(code, uri, status, response.getReason(), body.str());
|
||||
istr.seekg(0, std::ios::end);
|
||||
size_t body_length = istr.tellg();
|
||||
throw HTTPException(code, uri, status, response.getReason(), body_length);
|
||||
}
|
||||
}
|
||||
|
||||
@ -97,13 +95,13 @@ Exception HTTPException::makeExceptionMessage(
|
||||
const std::string & uri,
|
||||
Poco::Net::HTTPResponse::HTTPStatus http_status,
|
||||
const std::string & reason,
|
||||
const std::string & body)
|
||||
size_t body_length)
|
||||
{
|
||||
return Exception(code,
|
||||
"Received error from remote server {}. "
|
||||
"HTTP status code: {} {}, "
|
||||
"body: {}",
|
||||
uri, static_cast<int>(http_status), reason, body);
|
||||
"HTTP status code: {} '{}', "
|
||||
"body length: {} bytes",
|
||||
uri, static_cast<int>(http_status), reason, body_length);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -27,9 +27,9 @@ public:
|
||||
const std::string & uri,
|
||||
Poco::Net::HTTPResponse::HTTPStatus http_status_,
|
||||
const std::string & reason,
|
||||
const std::string & body
|
||||
size_t body_length = 0
|
||||
)
|
||||
: Exception(makeExceptionMessage(code, uri, http_status_, reason, body))
|
||||
: Exception(makeExceptionMessage(code, uri, http_status_, reason, body_length))
|
||||
, http_status(http_status_)
|
||||
{}
|
||||
|
||||
@ -46,7 +46,7 @@ private:
|
||||
const std::string & uri,
|
||||
Poco::Net::HTTPResponse::HTTPStatus http_status,
|
||||
const std::string & reason,
|
||||
const std::string & body);
|
||||
size_t body_length);
|
||||
|
||||
const char * name() const noexcept override { return "DB::HTTPException"; }
|
||||
const char * className() const noexcept override { return "DB::HTTPException"; }
|
||||
|
@ -423,8 +423,7 @@ std::unique_ptr<ReadBuffer> ReadWriteBufferFromHTTP::initialize()
|
||||
ErrorCodes::HTTP_RANGE_NOT_SATISFIABLE,
|
||||
current_uri.toString(),
|
||||
Poco::Net::HTTPResponse::HTTP_REQUESTED_RANGE_NOT_SATISFIABLE,
|
||||
reason,
|
||||
"");
|
||||
reason);
|
||||
}
|
||||
throw Exception(
|
||||
ErrorCodes::HTTP_RANGE_NOT_SATISFIABLE,
|
||||
@ -549,8 +548,7 @@ size_t ReadWriteBufferFromHTTP::readBigAt(char * to, size_t n, size_t offset, co
|
||||
ErrorCodes::HTTP_RANGE_NOT_SATISFIABLE,
|
||||
current_uri.toString(),
|
||||
Poco::Net::HTTPResponse::HTTP_REQUESTED_RANGE_NOT_SATISFIABLE,
|
||||
reason,
|
||||
"");
|
||||
reason);
|
||||
}
|
||||
|
||||
copyFromIStreamWithProgressCallback(*result.response_stream, to, n, progress_callback, &bytes_copied, &is_canceled);
|
||||
|
@ -122,11 +122,6 @@ FileCache::FileCache(const std::string & cache_name, const FileCacheSettings & s
|
||||
query_limit = std::make_unique<FileCacheQueryLimit>();
|
||||
}
|
||||
|
||||
FileCache::Key FileCache::createKeyForPath(const String & path)
|
||||
{
|
||||
return Key(path);
|
||||
}
|
||||
|
||||
const FileCache::UserInfo & FileCache::getCommonUser()
|
||||
{
|
||||
static UserInfo user(getCommonUserID(), 0);
|
||||
@ -1083,7 +1078,7 @@ void FileCache::freeSpaceRatioKeepingThreadFunc()
|
||||
if (eviction_candidates.size() > 0)
|
||||
{
|
||||
LOG_TRACE(log, "Current usage {}/{} in size, {}/{} in elements count "
|
||||
"(trying to keep size ration at {} and elements ratio at {}). "
|
||||
"(trying to keep size ratio at {} and elements ratio at {}). "
|
||||
"Collected {} eviction candidates, "
|
||||
"skipped {} candidates while iterating",
|
||||
main_priority->getSize(lock), size_limit,
|
||||
@ -1168,7 +1163,7 @@ void FileCache::removeFileSegment(const Key & key, size_t offset, const UserID &
|
||||
|
||||
void FileCache::removePathIfExists(const String & path, const UserID & user_id)
|
||||
{
|
||||
removeKeyIfExists(createKeyForPath(path), user_id);
|
||||
removeKeyIfExists(Key::fromPath(path), user_id);
|
||||
}
|
||||
|
||||
void FileCache::removeAllReleasable(const UserID & user_id)
|
||||
|
@ -88,8 +88,6 @@ public:
|
||||
|
||||
const String & getBasePath() const;
|
||||
|
||||
static Key createKeyForPath(const String & path);
|
||||
|
||||
static const UserInfo & getCommonUser();
|
||||
|
||||
static const UserInfo & getInternalUser();
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include "FileCacheFactory.h"
|
||||
#include "FileCache.h"
|
||||
#include <Poco/Util/AbstractConfiguration.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -43,6 +44,16 @@ FileCacheFactory::CacheByName FileCacheFactory::getAll()
|
||||
return caches_by_name;
|
||||
}
|
||||
|
||||
FileCachePtr FileCacheFactory::get(const std::string & cache_name)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
|
||||
auto it = caches_by_name.find(cache_name);
|
||||
if (it == caches_by_name.end())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no cache by name `{}`", cache_name);
|
||||
return it->second->cache;
|
||||
}
|
||||
|
||||
FileCachePtr FileCacheFactory::getOrCreate(
|
||||
const std::string & cache_name,
|
||||
const FileCacheSettings & file_cache_settings,
|
||||
@ -202,4 +213,20 @@ void FileCacheFactory::clear()
|
||||
caches_by_name.clear();
|
||||
}
|
||||
|
||||
void FileCacheFactory::loadDefaultCaches(const Poco::Util::AbstractConfiguration & config)
|
||||
{
|
||||
Poco::Util::AbstractConfiguration::Keys cache_names;
|
||||
config.keys(FILECACHE_DEFAULT_CONFIG_PATH, cache_names);
|
||||
auto * log = &Poco::Logger::get("FileCacheFactory");
|
||||
LOG_DEBUG(log, "Will load {} caches from default cache config", cache_names.size());
|
||||
for (const auto & name : cache_names)
|
||||
{
|
||||
FileCacheSettings settings;
|
||||
const auto & config_path = fmt::format("{}.{}", FILECACHE_DEFAULT_CONFIG_PATH, name);
|
||||
settings.loadFromConfig(config, config_path);
|
||||
auto cache = getOrCreate(name, settings, config_path);
|
||||
cache->initialize();
|
||||
LOG_DEBUG(log, "Loaded cache `{}` from default cache config", name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -44,6 +44,8 @@ public:
|
||||
const FileCacheSettings & file_cache_settings,
|
||||
const std::string & config_path);
|
||||
|
||||
FileCachePtr get(const std::string & cache_name);
|
||||
|
||||
FileCachePtr create(
|
||||
const std::string & cache_name,
|
||||
const FileCacheSettings & file_cache_settings,
|
||||
@ -53,8 +55,12 @@ public:
|
||||
|
||||
FileCacheDataPtr getByName(const std::string & cache_name);
|
||||
|
||||
void loadDefaultCaches(const Poco::Util::AbstractConfiguration & config);
|
||||
|
||||
void updateSettingsFromConfig(const Poco::Util::AbstractConfiguration & config);
|
||||
|
||||
void remove(FileCachePtr cache);
|
||||
|
||||
void clear();
|
||||
|
||||
private:
|
||||
|
@ -12,11 +12,6 @@ namespace ErrorCodes
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
FileCacheKey::FileCacheKey(const std::string & path)
|
||||
: key(sipHash128(path.data(), path.size()))
|
||||
{
|
||||
}
|
||||
|
||||
FileCacheKey::FileCacheKey(const UInt128 & key_)
|
||||
: key(key_)
|
||||
{
|
||||
@ -32,6 +27,16 @@ FileCacheKey FileCacheKey::random()
|
||||
return FileCacheKey(UUIDHelpers::generateV4().toUnderType());
|
||||
}
|
||||
|
||||
FileCacheKey FileCacheKey::fromPath(const std::string & path)
|
||||
{
|
||||
return FileCacheKey(sipHash128(path.data(), path.size()));
|
||||
}
|
||||
|
||||
FileCacheKey FileCacheKey::fromKey(const UInt128 & key)
|
||||
{
|
||||
return FileCacheKey(key);
|
||||
}
|
||||
|
||||
FileCacheKey FileCacheKey::fromKeyString(const std::string & key_str)
|
||||
{
|
||||
if (key_str.size() != 32)
|
||||
|
@ -14,16 +14,16 @@ struct FileCacheKey
|
||||
|
||||
FileCacheKey() = default;
|
||||
|
||||
explicit FileCacheKey(const std::string & path);
|
||||
|
||||
explicit FileCacheKey(const UInt128 & key_);
|
||||
|
||||
static FileCacheKey random();
|
||||
static FileCacheKey fromPath(const std::string & path);
|
||||
static FileCacheKey fromKey(const UInt128 & key);
|
||||
static FileCacheKey fromKeyString(const std::string & key_str);
|
||||
|
||||
bool operator==(const FileCacheKey & other) const { return key == other.key; }
|
||||
bool operator<(const FileCacheKey & other) const { return key < other.key; }
|
||||
|
||||
static FileCacheKey fromKeyString(const std::string & key_str);
|
||||
private:
|
||||
explicit FileCacheKey(const UInt128 & key_);
|
||||
};
|
||||
|
||||
using FileCacheKeyAndOffset = std::pair<FileCacheKey, size_t>;
|
||||
|
@ -15,10 +15,12 @@ static constexpr size_t FILECACHE_BYPASS_THRESHOLD = 256 * 1024 * 1024;
|
||||
static constexpr double FILECACHE_DEFAULT_FREE_SPACE_SIZE_RATIO = 0; /// Disabled.
|
||||
static constexpr double FILECACHE_DEFAULT_FREE_SPACE_ELEMENTS_RATIO = 0; /// Disabled.
|
||||
static constexpr int FILECACHE_DEFAULT_FREE_SPACE_REMOVE_BATCH = 10;
|
||||
static constexpr auto FILECACHE_DEFAULT_CONFIG_PATH = "filesystem_caches";
|
||||
|
||||
class FileCache;
|
||||
using FileCachePtr = std::shared_ptr<FileCache>;
|
||||
|
||||
struct FileCacheSettings;
|
||||
struct FileCacheKey;
|
||||
|
||||
}
|
||||
|
@ -451,12 +451,12 @@ void executeQuery(
|
||||
return;
|
||||
}
|
||||
|
||||
DataStreams input_streams;
|
||||
input_streams.reserve(plans.size());
|
||||
Headers input_headers;
|
||||
input_headers.reserve(plans.size());
|
||||
for (auto & plan : plans)
|
||||
input_streams.emplace_back(plan->getCurrentDataStream());
|
||||
input_headers.emplace_back(plan->getCurrentHeader());
|
||||
|
||||
auto union_step = std::make_unique<UnionStep>(std::move(input_streams));
|
||||
auto union_step = std::make_unique<UnionStep>(std::move(input_headers));
|
||||
query_plan.unitePlans(std::move(union_step), std::move(plans));
|
||||
}
|
||||
|
||||
@ -653,16 +653,16 @@ void executeQueryWithParallelReplicas(
|
||||
auto remote_plan = std::make_unique<QueryPlan>();
|
||||
remote_plan->addStep(std::move(read_from_remote));
|
||||
|
||||
DataStreams input_streams;
|
||||
input_streams.reserve(2);
|
||||
input_streams.emplace_back(local_plan->getCurrentDataStream());
|
||||
input_streams.emplace_back(remote_plan->getCurrentDataStream());
|
||||
Headers input_headers;
|
||||
input_headers.reserve(2);
|
||||
input_headers.emplace_back(local_plan->getCurrentHeader());
|
||||
input_headers.emplace_back(remote_plan->getCurrentHeader());
|
||||
|
||||
std::vector<QueryPlanPtr> plans;
|
||||
plans.emplace_back(std::move(local_plan));
|
||||
plans.emplace_back(std::move(remote_plan));
|
||||
|
||||
auto union_step = std::make_unique<UnionStep>(std::move(input_streams));
|
||||
auto union_step = std::make_unique<UnionStep>(std::move(input_headers));
|
||||
query_plan.unitePlans(std::move(union_step), std::move(plans));
|
||||
}
|
||||
else
|
||||
|
@ -1056,7 +1056,7 @@ static std::shared_ptr<IJoin> tryCreateJoin(
|
||||
static std::shared_ptr<IJoin> chooseJoinAlgorithm(
|
||||
std::shared_ptr<TableJoin> analyzed_join, const ColumnsWithTypeAndName & left_sample_columns, std::unique_ptr<QueryPlan> & joined_plan, ContextPtr context)
|
||||
{
|
||||
Block right_sample_block = joined_plan->getCurrentDataStream().header;
|
||||
Block right_sample_block = joined_plan->getCurrentHeader();
|
||||
const auto & join_algorithms = analyzed_join->getEnabledJoinAlgorithms();
|
||||
for (const auto alg : join_algorithms)
|
||||
{
|
||||
@ -1109,13 +1109,13 @@ static std::unique_ptr<QueryPlan> buildJoinedPlan(
|
||||
rename_dag.getOutputs()[pos] = &alias;
|
||||
}
|
||||
}
|
||||
rename_dag.appendInputsForUnusedColumns(joined_plan->getCurrentDataStream().header);
|
||||
auto rename_step = std::make_unique<ExpressionStep>(joined_plan->getCurrentDataStream(), std::move(rename_dag));
|
||||
rename_dag.appendInputsForUnusedColumns(joined_plan->getCurrentHeader());
|
||||
auto rename_step = std::make_unique<ExpressionStep>(joined_plan->getCurrentHeader(), std::move(rename_dag));
|
||||
rename_step->setStepDescription("Rename joined columns");
|
||||
joined_plan->addStep(std::move(rename_step));
|
||||
}
|
||||
|
||||
auto joined_actions_step = std::make_unique<ExpressionStep>(joined_plan->getCurrentDataStream(), std::move(joined_block_actions));
|
||||
auto joined_actions_step = std::make_unique<ExpressionStep>(joined_plan->getCurrentHeader(), std::move(joined_block_actions));
|
||||
joined_actions_step->setStepDescription("Joined actions");
|
||||
joined_plan->addStep(std::move(joined_actions_step));
|
||||
|
||||
@ -1197,11 +1197,11 @@ JoinPtr SelectQueryExpressionAnalyzer::makeJoin(
|
||||
|
||||
joined_plan = buildJoinedPlan(getContext(), join_element, *analyzed_join, query_options);
|
||||
|
||||
const ColumnsWithTypeAndName & right_columns = joined_plan->getCurrentDataStream().header.getColumnsWithTypeAndName();
|
||||
const ColumnsWithTypeAndName & right_columns = joined_plan->getCurrentHeader().getColumnsWithTypeAndName();
|
||||
std::tie(left_convert_actions, right_convert_actions) = analyzed_join->createConvertingActions(left_columns, right_columns);
|
||||
if (right_convert_actions)
|
||||
{
|
||||
auto converting_step = std::make_unique<ExpressionStep>(joined_plan->getCurrentDataStream(), std::move(*right_convert_actions));
|
||||
auto converting_step = std::make_unique<ExpressionStep>(joined_plan->getCurrentHeader(), std::move(*right_convert_actions));
|
||||
converting_step->setStepDescription("Convert joined columns");
|
||||
joined_plan->addStep(std::move(converting_step));
|
||||
}
|
||||
|
@ -182,10 +182,10 @@ void IInterpreterUnionOrSelectQuery::addAdditionalPostFilter(QueryPlan & plan) c
|
||||
if (!ast)
|
||||
return;
|
||||
|
||||
auto dag = makeAdditionalPostFilter(ast, context, plan.getCurrentDataStream().header);
|
||||
auto dag = makeAdditionalPostFilter(ast, context, plan.getCurrentHeader());
|
||||
std::string filter_name = dag.getOutputs().back()->result_name;
|
||||
auto filter_step = std::make_unique<FilterStep>(
|
||||
plan.getCurrentDataStream(), std::move(dag), std::move(filter_name), true);
|
||||
plan.getCurrentHeader(), std::move(dag), std::move(filter_name), true);
|
||||
filter_step->setStepDescription("Additional result filter");
|
||||
plan.addStep(std::move(filter_step));
|
||||
}
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include <Common/StringUtils.h>
|
||||
#include <Common/atomicRename.h>
|
||||
#include <Common/escapeForFileName.h>
|
||||
#include <Common/getRandomASCIIString.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Common/randomSeed.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
@ -109,7 +110,6 @@ namespace Setting
|
||||
extern const SettingsBool allow_experimental_database_materialized_postgresql;
|
||||
extern const SettingsBool allow_experimental_full_text_index;
|
||||
extern const SettingsBool allow_experimental_inverted_index;
|
||||
extern const SettingsBool allow_experimental_refreshable_materialized_view;
|
||||
extern const SettingsBool allow_experimental_statistics;
|
||||
extern const SettingsBool allow_experimental_vector_similarity_index;
|
||||
extern const SettingsBool allow_materialized_view_with_bad_select;
|
||||
@ -1576,10 +1576,6 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
|
||||
|
||||
if (create.refresh_strategy)
|
||||
{
|
||||
if (!getContext()->getSettingsRef()[Setting::allow_experimental_refreshable_materialized_view])
|
||||
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED,
|
||||
"Refreshable materialized views are experimental. Enable allow_experimental_refreshable_materialized_view to use");
|
||||
|
||||
AddDefaultDatabaseVisitor visitor(getContext(), current_database);
|
||||
visitor.visit(*create.refresh_strategy);
|
||||
}
|
||||
@ -1668,7 +1664,8 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
|
||||
if (need_add_to_database && !database)
|
||||
throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Database {} does not exist", backQuoteIfNeed(database_name));
|
||||
|
||||
if (create.replace_table)
|
||||
if (create.replace_table
|
||||
|| (create.replace_view && (database->getEngineName() == "Atomic" || database->getEngineName() == "Replicated")))
|
||||
{
|
||||
chassert(!ddl_guard);
|
||||
return doCreateOrReplaceTable(create, properties, mode);
|
||||
@ -1978,15 +1975,19 @@ BlockIO InterpreterCreateQuery::doCreateOrReplaceTable(ASTCreateQuery & create,
|
||||
|
||||
|
||||
UInt64 name_hash = sipHash64(create.getDatabase() + create.getTable());
|
||||
UInt16 random_suffix = thread_local_rng();
|
||||
String random_suffix;
|
||||
if (auto txn = current_context->getZooKeeperMetadataTransaction())
|
||||
{
|
||||
/// Avoid different table name on database replicas
|
||||
random_suffix = sipHash64(txn->getTaskZooKeeperPath());
|
||||
UInt16 hashed_zk_path = sipHash64(txn->getTaskZooKeeperPath());
|
||||
random_suffix = getHexUIntLowercase(hashed_zk_path);
|
||||
}
|
||||
create.setTable(fmt::format("_tmp_replace_{}_{}",
|
||||
getHexUIntLowercase(name_hash),
|
||||
getHexUIntLowercase(random_suffix)));
|
||||
else
|
||||
{
|
||||
random_suffix = getRandomASCIIString(/*length=*/4);
|
||||
}
|
||||
|
||||
create.setTable(fmt::format("_tmp_replace_{}_{}", getHexUIntLowercase(name_hash), random_suffix));
|
||||
|
||||
ast_drop->setTable(create.getTable());
|
||||
ast_drop->is_dictionary = create.is_dictionary;
|
||||
@ -2029,16 +2030,16 @@ BlockIO InterpreterCreateQuery::doCreateOrReplaceTable(ASTCreateQuery & create,
|
||||
|
||||
auto ast_rename = std::make_shared<ASTRenameQuery>(ASTRenameQuery::Elements{std::move(elem)});
|
||||
ast_rename->dictionary = create.is_dictionary;
|
||||
if (create.create_or_replace)
|
||||
if (create.create_or_replace || create.replace_view)
|
||||
{
|
||||
/// CREATE OR REPLACE TABLE
|
||||
/// CREATE OR REPLACE TABLE/VIEW
|
||||
/// Will execute ordinary RENAME instead of EXCHANGE if the target table does not exist
|
||||
ast_rename->rename_if_cannot_exchange = true;
|
||||
ast_rename->exchange = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
/// REPLACE TABLE
|
||||
/// REPLACE TABLE/VIEW
|
||||
/// Will execute EXCHANGE query and fail if the target table does not exist
|
||||
ast_rename->exchange = true;
|
||||
}
|
||||
|
@ -85,7 +85,8 @@ public:
|
||||
void extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr & ast, ContextPtr) const override;
|
||||
|
||||
/// Check access right, validate definer statement and replace `CURRENT USER` with actual name.
|
||||
static void processSQLSecurityOption(ContextPtr context_, ASTSQLSecurity & sql_security, bool is_materialized_view = false, bool skip_check_permissions = false);
|
||||
static void processSQLSecurityOption(
|
||||
ContextPtr context_, ASTSQLSecurity & sql_security, bool is_materialized_view = false, bool skip_check_permissions = false);
|
||||
|
||||
private:
|
||||
struct TableProperties
|
||||
|
@ -128,29 +128,29 @@ void InterpreterSelectIntersectExceptQuery::buildQueryPlan(QueryPlan & query_pla
|
||||
|
||||
size_t num_plans = nested_interpreters.size();
|
||||
std::vector<std::unique_ptr<QueryPlan>> plans(num_plans);
|
||||
DataStreams data_streams(num_plans);
|
||||
Headers headers(num_plans);
|
||||
|
||||
for (size_t i = 0; i < num_plans; ++i)
|
||||
{
|
||||
plans[i] = std::make_unique<QueryPlan>();
|
||||
nested_interpreters[i]->buildQueryPlan(*plans[i]);
|
||||
|
||||
if (!blocksHaveEqualStructure(plans[i]->getCurrentDataStream().header, result_header))
|
||||
if (!blocksHaveEqualStructure(plans[i]->getCurrentHeader(), result_header))
|
||||
{
|
||||
auto actions_dag = ActionsDAG::makeConvertingActions(
|
||||
plans[i]->getCurrentDataStream().header.getColumnsWithTypeAndName(),
|
||||
plans[i]->getCurrentHeader().getColumnsWithTypeAndName(),
|
||||
result_header.getColumnsWithTypeAndName(),
|
||||
ActionsDAG::MatchColumnsMode::Position);
|
||||
auto converting_step = std::make_unique<ExpressionStep>(plans[i]->getCurrentDataStream(), std::move(actions_dag));
|
||||
auto converting_step = std::make_unique<ExpressionStep>(plans[i]->getCurrentHeader(), std::move(actions_dag));
|
||||
converting_step->setStepDescription("Conversion before UNION");
|
||||
plans[i]->addStep(std::move(converting_step));
|
||||
}
|
||||
|
||||
data_streams[i] = plans[i]->getCurrentDataStream();
|
||||
headers[i] = plans[i]->getCurrentHeader();
|
||||
}
|
||||
|
||||
const Settings & settings = context->getSettingsRef();
|
||||
auto step = std::make_unique<IntersectOrExceptStep>(std::move(data_streams), final_operator, settings[Setting::max_threads]);
|
||||
auto step = std::make_unique<IntersectOrExceptStep>(std::move(headers), final_operator, settings[Setting::max_threads]);
|
||||
query_plan.unitePlans(std::move(step), std::move(plans));
|
||||
|
||||
const auto & query = query_ptr->as<ASTSelectIntersectExceptQuery &>();
|
||||
@ -161,7 +161,7 @@ void InterpreterSelectIntersectExceptQuery::buildQueryPlan(QueryPlan & query_pla
|
||||
SizeLimits limits(settings[Setting::max_rows_in_distinct], settings[Setting::max_bytes_in_distinct], settings[Setting::distinct_overflow_mode]);
|
||||
|
||||
auto distinct_step = std::make_unique<DistinctStep>(
|
||||
query_plan.getCurrentDataStream(),
|
||||
query_plan.getCurrentHeader(),
|
||||
limits,
|
||||
0,
|
||||
result_header.getNames(),
|
||||
|
@ -1087,15 +1087,15 @@ void InterpreterSelectQuery::buildQueryPlan(QueryPlan & query_plan)
|
||||
executeImpl(query_plan, std::move(input_pipe));
|
||||
|
||||
/// We must guarantee that result structure is the same as in getSampleBlock()
|
||||
if (!blocksHaveEqualStructure(query_plan.getCurrentDataStream().header, result_header))
|
||||
if (!blocksHaveEqualStructure(query_plan.getCurrentHeader(), result_header))
|
||||
{
|
||||
auto convert_actions_dag = ActionsDAG::makeConvertingActions(
|
||||
query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName(),
|
||||
query_plan.getCurrentHeader().getColumnsWithTypeAndName(),
|
||||
result_header.getColumnsWithTypeAndName(),
|
||||
ActionsDAG::MatchColumnsMode::Name,
|
||||
true);
|
||||
|
||||
auto converting = std::make_unique<ExpressionStep>(query_plan.getCurrentDataStream(), std::move(convert_actions_dag));
|
||||
auto converting = std::make_unique<ExpressionStep>(query_plan.getCurrentHeader(), std::move(convert_actions_dag));
|
||||
query_plan.addStep(std::move(converting));
|
||||
}
|
||||
|
||||
@ -1606,7 +1606,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
|
||||
if (expressions.filter_info)
|
||||
{
|
||||
auto row_level_security_step = std::make_unique<FilterStep>(
|
||||
query_plan.getCurrentDataStream(),
|
||||
query_plan.getCurrentHeader(),
|
||||
expressions.filter_info->actions.clone(),
|
||||
expressions.filter_info->column_name,
|
||||
expressions.filter_info->do_remove_column);
|
||||
@ -1620,7 +1620,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
|
||||
if (expressions.prewhere_info->row_level_filter)
|
||||
{
|
||||
auto row_level_filter_step = std::make_unique<FilterStep>(
|
||||
query_plan.getCurrentDataStream(),
|
||||
query_plan.getCurrentHeader(),
|
||||
expressions.prewhere_info->row_level_filter->clone(),
|
||||
expressions.prewhere_info->row_level_column_name,
|
||||
true);
|
||||
@ -1630,7 +1630,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
|
||||
}
|
||||
|
||||
auto prewhere_step = std::make_unique<FilterStep>(
|
||||
query_plan.getCurrentDataStream(),
|
||||
query_plan.getCurrentHeader(),
|
||||
expressions.prewhere_info->prewhere_actions.clone(),
|
||||
expressions.prewhere_info->prewhere_column_name,
|
||||
expressions.prewhere_info->remove_prewhere_column);
|
||||
@ -1732,7 +1732,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
|
||||
if (expressions.filter_info)
|
||||
{
|
||||
auto row_level_security_step = std::make_unique<FilterStep>(
|
||||
query_plan.getCurrentDataStream(),
|
||||
query_plan.getCurrentHeader(),
|
||||
expressions.filter_info->actions.clone(),
|
||||
expressions.filter_info->column_name,
|
||||
expressions.filter_info->do_remove_column);
|
||||
@ -1744,7 +1744,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
|
||||
const auto add_filter_step = [&](auto & new_filter_info, const std::string & description)
|
||||
{
|
||||
auto filter_step = std::make_unique<FilterStep>(
|
||||
query_plan.getCurrentDataStream(),
|
||||
query_plan.getCurrentHeader(),
|
||||
std::move(new_filter_info->actions),
|
||||
new_filter_info->column_name,
|
||||
new_filter_info->do_remove_column);
|
||||
@ -1766,7 +1766,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
|
||||
{
|
||||
QueryPlanStepPtr array_join_step
|
||||
= std::make_unique<ArrayJoinStep>(
|
||||
query_plan.getCurrentDataStream(),
|
||||
query_plan.getCurrentHeader(),
|
||||
*expressions.array_join,
|
||||
settings[Setting::enable_unaligned_array_join],
|
||||
settings[Setting::max_block_size]);
|
||||
@ -1787,7 +1787,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
|
||||
if (expressions.join->isFilled())
|
||||
{
|
||||
QueryPlanStepPtr filled_join_step
|
||||
= std::make_unique<FilledJoinStep>(query_plan.getCurrentDataStream(), expressions.join, settings[Setting::max_block_size]);
|
||||
= std::make_unique<FilledJoinStep>(query_plan.getCurrentHeader(), expressions.join, settings[Setting::max_block_size]);
|
||||
|
||||
filled_join_step->setStepDescription("JOIN");
|
||||
query_plan.addStep(std::move(filled_join_step));
|
||||
@ -1809,7 +1809,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
|
||||
SortingStep::Settings sort_settings(*context);
|
||||
|
||||
auto sorting_step = std::make_unique<SortingStep>(
|
||||
plan.getCurrentDataStream(),
|
||||
plan.getCurrentHeader(),
|
||||
std::move(order_descr),
|
||||
0 /* LIMIT */, sort_settings);
|
||||
sorting_step->setStepDescription(fmt::format("Sort {} before JOIN", join_pos));
|
||||
@ -1821,7 +1821,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
|
||||
= [&settings, crosswise_connection](QueryPlan & plan, const Names & key_names, JoinTableSide join_pos)
|
||||
{
|
||||
auto creating_set_step = std::make_unique<CreateSetAndFilterOnTheFlyStep>(
|
||||
plan.getCurrentDataStream(),
|
||||
plan.getCurrentHeader(),
|
||||
key_names,
|
||||
settings[Setting::max_rows_in_set_to_optimize_join],
|
||||
crosswise_connection,
|
||||
@ -1858,8 +1858,8 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
|
||||
/// It doesn't hold such a guarantee for streams with const keys.
|
||||
/// Note: it's also doesn't work with the read-in-order optimization.
|
||||
/// No checks here because read in order is not applied if we have `CreateSetAndFilterOnTheFlyStep` in the pipeline between the reading and sorting steps.
|
||||
bool has_non_const_keys = has_non_const(query_plan.getCurrentDataStream().header, join_clause.key_names_left)
|
||||
&& has_non_const(joined_plan->getCurrentDataStream().header, join_clause.key_names_right);
|
||||
bool has_non_const_keys = has_non_const(query_plan.getCurrentHeader(), join_clause.key_names_left)
|
||||
&& has_non_const(joined_plan->getCurrentHeader(), join_clause.key_names_right);
|
||||
|
||||
if (settings[Setting::max_rows_in_set_to_optimize_join] > 0 && join_type_allows_filtering && has_non_const_keys)
|
||||
{
|
||||
@ -1878,8 +1878,8 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
|
||||
}
|
||||
|
||||
QueryPlanStepPtr join_step = std::make_unique<JoinStep>(
|
||||
query_plan.getCurrentDataStream(),
|
||||
joined_plan->getCurrentDataStream(),
|
||||
query_plan.getCurrentHeader(),
|
||||
joined_plan->getCurrentHeader(),
|
||||
expressions.join,
|
||||
settings[Setting::max_block_size],
|
||||
max_streams,
|
||||
@ -2152,7 +2152,7 @@ static void executeMergeAggregatedImpl(
|
||||
auto grouping_sets_params = getAggregatorGroupingSetsParams(aggregation_keys_list, keys);
|
||||
|
||||
auto merging_aggregated = std::make_unique<MergingAggregatedStep>(
|
||||
query_plan.getCurrentDataStream(),
|
||||
query_plan.getCurrentHeader(),
|
||||
params,
|
||||
grouping_sets_params,
|
||||
final,
|
||||
@ -2710,7 +2710,7 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc
|
||||
/// Aliases in table declaration.
|
||||
if (processing_stage == QueryProcessingStage::FetchColumns && alias_actions)
|
||||
{
|
||||
auto table_aliases = std::make_unique<ExpressionStep>(query_plan.getCurrentDataStream(), alias_actions->clone());
|
||||
auto table_aliases = std::make_unique<ExpressionStep>(query_plan.getCurrentHeader(), alias_actions->clone());
|
||||
table_aliases->setStepDescription("Add table aliases");
|
||||
query_plan.addStep(std::move(table_aliases));
|
||||
}
|
||||
@ -2720,10 +2720,10 @@ void InterpreterSelectQuery::executeWhere(QueryPlan & query_plan, const ActionsA
|
||||
{
|
||||
auto dag = expression->dag.clone();
|
||||
if (expression->project_input)
|
||||
dag.appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header);
|
||||
dag.appendInputsForUnusedColumns(query_plan.getCurrentHeader());
|
||||
|
||||
auto where_step = std::make_unique<FilterStep>(
|
||||
query_plan.getCurrentDataStream(), std::move(dag), getSelectQuery().where()->getColumnName(), remove_filter);
|
||||
query_plan.getCurrentHeader(), std::move(dag), getSelectQuery().where()->getColumnName(), remove_filter);
|
||||
|
||||
where_step->setStepDescription("WHERE");
|
||||
query_plan.addStep(std::move(where_step));
|
||||
@ -2826,7 +2826,7 @@ void InterpreterSelectQuery::executeAggregation(
|
||||
&& (settings[Setting::distributed_aggregation_memory_efficient] || settings[Setting::enable_memory_bound_merging_of_aggregation_results]);
|
||||
|
||||
auto aggregating_step = std::make_unique<AggregatingStep>(
|
||||
query_plan.getCurrentDataStream(),
|
||||
query_plan.getCurrentHeader(),
|
||||
std::move(aggregator_params),
|
||||
std::move(grouping_sets_params),
|
||||
final,
|
||||
@ -2870,10 +2870,10 @@ void InterpreterSelectQuery::executeHaving(QueryPlan & query_plan, const Actions
|
||||
{
|
||||
auto dag = expression->dag.clone();
|
||||
if (expression->project_input)
|
||||
dag.appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header);
|
||||
dag.appendInputsForUnusedColumns(query_plan.getCurrentHeader());
|
||||
|
||||
auto having_step
|
||||
= std::make_unique<FilterStep>(query_plan.getCurrentDataStream(), std::move(dag), getSelectQuery().having()->getColumnName(), remove_filter);
|
||||
= std::make_unique<FilterStep>(query_plan.getCurrentHeader(), std::move(dag), getSelectQuery().having()->getColumnName(), remove_filter);
|
||||
|
||||
having_step->setStepDescription("HAVING");
|
||||
query_plan.addStep(std::move(having_step));
|
||||
@ -2893,13 +2893,13 @@ void InterpreterSelectQuery::executeTotalsAndHaving(
|
||||
{
|
||||
dag = expression->dag.clone();
|
||||
if (expression->project_input)
|
||||
dag->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header);
|
||||
dag->appendInputsForUnusedColumns(query_plan.getCurrentHeader());
|
||||
}
|
||||
|
||||
const Settings & settings = context->getSettingsRef();
|
||||
|
||||
auto totals_having_step = std::make_unique<TotalsHavingStep>(
|
||||
query_plan.getCurrentDataStream(),
|
||||
query_plan.getCurrentHeader(),
|
||||
query_analyzer->aggregates(),
|
||||
overflow_row,
|
||||
std::move(dag),
|
||||
@ -2928,9 +2928,9 @@ void InterpreterSelectQuery::executeRollupOrCube(QueryPlan & query_plan, Modific
|
||||
|
||||
QueryPlanStepPtr step;
|
||||
if (modificator == Modificator::ROLLUP)
|
||||
step = std::make_unique<RollupStep>(query_plan.getCurrentDataStream(), std::move(params), final, settings[Setting::group_by_use_nulls]);
|
||||
step = std::make_unique<RollupStep>(query_plan.getCurrentHeader(), std::move(params), final, settings[Setting::group_by_use_nulls]);
|
||||
else if (modificator == Modificator::CUBE)
|
||||
step = std::make_unique<CubeStep>(query_plan.getCurrentDataStream(), std::move(params), final, settings[Setting::group_by_use_nulls]);
|
||||
step = std::make_unique<CubeStep>(query_plan.getCurrentHeader(), std::move(params), final, settings[Setting::group_by_use_nulls]);
|
||||
|
||||
query_plan.addStep(std::move(step));
|
||||
}
|
||||
@ -2942,9 +2942,9 @@ void InterpreterSelectQuery::executeExpression(QueryPlan & query_plan, const Act
|
||||
|
||||
auto dag = expression->dag.clone();
|
||||
if (expression->project_input)
|
||||
dag.appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header);
|
||||
dag.appendInputsForUnusedColumns(query_plan.getCurrentHeader());
|
||||
|
||||
auto expression_step = std::make_unique<ExpressionStep>(query_plan.getCurrentDataStream(), std::move(dag));
|
||||
auto expression_step = std::make_unique<ExpressionStep>(query_plan.getCurrentHeader(), std::move(dag));
|
||||
|
||||
expression_step->setStepDescription(description);
|
||||
query_plan.addStep(std::move(expression_step));
|
||||
@ -3028,7 +3028,7 @@ void InterpreterSelectQuery::executeWindow(QueryPlan & query_plan)
|
||||
SortingStep::Settings sort_settings(*context);
|
||||
|
||||
auto sorting_step = std::make_unique<SortingStep>(
|
||||
query_plan.getCurrentDataStream(),
|
||||
query_plan.getCurrentHeader(),
|
||||
window.full_sort_description,
|
||||
window.partition_by,
|
||||
0 /* LIMIT */,
|
||||
@ -3042,7 +3042,7 @@ void InterpreterSelectQuery::executeWindow(QueryPlan & query_plan)
|
||||
const bool streams_fan_out
|
||||
= settings[Setting::query_plan_enable_multithreading_after_window_functions] && ((i + 1) == windows_sorted.size());
|
||||
|
||||
auto window_step = std::make_unique<WindowStep>(query_plan.getCurrentDataStream(), window, window.window_functions, streams_fan_out);
|
||||
auto window_step = std::make_unique<WindowStep>(query_plan.getCurrentHeader(), window, window.window_functions, streams_fan_out);
|
||||
window_step->setStepDescription("Window step for window '" + window.window_name + "'");
|
||||
|
||||
query_plan.addStep(std::move(window_step));
|
||||
@ -3055,7 +3055,7 @@ void InterpreterSelectQuery::executeOrderOptimized(QueryPlan & query_plan, Input
|
||||
const Settings & settings = context->getSettingsRef();
|
||||
|
||||
auto finish_sorting_step = std::make_unique<SortingStep>(
|
||||
query_plan.getCurrentDataStream(),
|
||||
query_plan.getCurrentHeader(),
|
||||
input_sorting_info->sort_description_for_merging,
|
||||
output_order_descr,
|
||||
settings[Setting::max_block_size],
|
||||
@ -3086,7 +3086,7 @@ void InterpreterSelectQuery::executeOrder(QueryPlan & query_plan, InputOrderInfo
|
||||
|
||||
/// Merge the sorted blocks.
|
||||
auto sorting_step = std::make_unique<SortingStep>(
|
||||
query_plan.getCurrentDataStream(),
|
||||
query_plan.getCurrentHeader(),
|
||||
output_order_descr,
|
||||
limit,
|
||||
sort_settings);
|
||||
@ -3105,7 +3105,7 @@ void InterpreterSelectQuery::executeMergeSorted(QueryPlan & query_plan, const st
|
||||
const auto exact_rows_before_limit = context->getSettingsRef()[Setting::exact_rows_before_limit];
|
||||
|
||||
auto merging_sorted = std::make_unique<SortingStep>(
|
||||
query_plan.getCurrentDataStream(), std::move(sort_description), max_block_size, limit, exact_rows_before_limit);
|
||||
query_plan.getCurrentHeader(), std::move(sort_description), max_block_size, limit, exact_rows_before_limit);
|
||||
merging_sorted->setStepDescription("Merge sorted streams " + description);
|
||||
query_plan.addStep(std::move(merging_sorted));
|
||||
}
|
||||
@ -3140,7 +3140,7 @@ void InterpreterSelectQuery::executeDistinct(QueryPlan & query_plan, bool before
|
||||
SizeLimits limits(settings[Setting::max_rows_in_distinct], settings[Setting::max_bytes_in_distinct], settings[Setting::distinct_overflow_mode]);
|
||||
|
||||
auto distinct_step = std::make_unique<DistinctStep>(
|
||||
query_plan.getCurrentDataStream(),
|
||||
query_plan.getCurrentHeader(),
|
||||
limits,
|
||||
limit_for_distinct,
|
||||
columns,
|
||||
@ -3175,7 +3175,7 @@ void InterpreterSelectQuery::executePreLimit(QueryPlan & query_plan, bool do_not
|
||||
const Settings & settings = context->getSettingsRef();
|
||||
|
||||
auto limit
|
||||
= std::make_unique<LimitStep>(query_plan.getCurrentDataStream(), limit_length, limit_offset, settings[Setting::exact_rows_before_limit]);
|
||||
= std::make_unique<LimitStep>(query_plan.getCurrentHeader(), limit_length, limit_offset, settings[Setting::exact_rows_before_limit]);
|
||||
if (do_not_skip_offset)
|
||||
limit->setStepDescription("preliminary LIMIT (with OFFSET)");
|
||||
else
|
||||
@ -3199,7 +3199,7 @@ void InterpreterSelectQuery::executeLimitBy(QueryPlan & query_plan)
|
||||
UInt64 length = getLimitUIntValue(query.limitByLength(), context, "LIMIT");
|
||||
UInt64 offset = (query.limitByOffset() ? getLimitUIntValue(query.limitByOffset(), context, "OFFSET") : 0);
|
||||
|
||||
auto limit_by = std::make_unique<LimitByStep>(query_plan.getCurrentDataStream(), length, offset, columns);
|
||||
auto limit_by = std::make_unique<LimitByStep>(query_plan.getCurrentHeader(), length, offset, columns);
|
||||
query_plan.addStep(std::move(limit_by));
|
||||
}
|
||||
|
||||
@ -3224,7 +3224,7 @@ void InterpreterSelectQuery::executeWithFill(QueryPlan & query_plan)
|
||||
|
||||
const Settings & settings = context->getSettingsRef();
|
||||
auto filling_step = std::make_unique<FillingStep>(
|
||||
query_plan.getCurrentDataStream(),
|
||||
query_plan.getCurrentHeader(),
|
||||
std::move(sort_description),
|
||||
std::move(fill_description),
|
||||
interpolate_descr,
|
||||
@ -3271,7 +3271,7 @@ void InterpreterSelectQuery::executeLimit(QueryPlan & query_plan)
|
||||
}
|
||||
|
||||
auto limit = std::make_unique<LimitStep>(
|
||||
query_plan.getCurrentDataStream(),
|
||||
query_plan.getCurrentHeader(),
|
||||
limit_length, limit_offset, always_read_till_end, query.limit_with_ties, order_descr);
|
||||
|
||||
if (query.limit_with_ties)
|
||||
@ -3292,7 +3292,7 @@ void InterpreterSelectQuery::executeOffset(QueryPlan & query_plan)
|
||||
UInt64 limit_offset;
|
||||
std::tie(limit_length, limit_offset) = getLimitLengthAndOffset(query, context);
|
||||
|
||||
auto offsets_step = std::make_unique<OffsetStep>(query_plan.getCurrentDataStream(), limit_offset);
|
||||
auto offsets_step = std::make_unique<OffsetStep>(query_plan.getCurrentHeader(), limit_offset);
|
||||
query_plan.addStep(std::move(offsets_step));
|
||||
}
|
||||
}
|
||||
@ -3302,7 +3302,7 @@ void InterpreterSelectQuery::executeExtremes(QueryPlan & query_plan)
|
||||
if (!context->getSettingsRef()[Setting::extremes])
|
||||
return;
|
||||
|
||||
auto extremes_step = std::make_unique<ExtremesStep>(query_plan.getCurrentDataStream());
|
||||
auto extremes_step = std::make_unique<ExtremesStep>(query_plan.getCurrentHeader());
|
||||
query_plan.addStep(std::move(extremes_step));
|
||||
}
|
||||
|
||||
@ -3313,7 +3313,7 @@ void InterpreterSelectQuery::executeSubqueriesInSetsAndJoins(QueryPlan & query_p
|
||||
if (!subqueries.empty())
|
||||
{
|
||||
auto step = std::make_unique<DelayedCreatingSetsStep>(
|
||||
query_plan.getCurrentDataStream(),
|
||||
query_plan.getCurrentHeader(),
|
||||
std::move(subqueries),
|
||||
context);
|
||||
|
||||
|
@ -213,7 +213,7 @@ Block InterpreterSelectQueryAnalyzer::getSampleBlock(const QueryTreeNodePtr & qu
|
||||
Block InterpreterSelectQueryAnalyzer::getSampleBlock()
|
||||
{
|
||||
planner.buildQueryPlanIfNeeded();
|
||||
return planner.getQueryPlan().getCurrentDataStream().header;
|
||||
return planner.getQueryPlan().getCurrentHeader();
|
||||
}
|
||||
|
||||
BlockIO InterpreterSelectQueryAnalyzer::execute()
|
||||
|
@ -316,29 +316,29 @@ void InterpreterSelectWithUnionQuery::buildQueryPlan(QueryPlan & query_plan)
|
||||
else
|
||||
{
|
||||
std::vector<std::unique_ptr<QueryPlan>> plans(num_plans);
|
||||
DataStreams data_streams(num_plans);
|
||||
Headers headers(num_plans);
|
||||
|
||||
for (size_t i = 0; i < num_plans; ++i)
|
||||
{
|
||||
plans[i] = std::make_unique<QueryPlan>();
|
||||
nested_interpreters[i]->buildQueryPlan(*plans[i]);
|
||||
|
||||
if (!blocksHaveEqualStructure(plans[i]->getCurrentDataStream().header, result_header))
|
||||
if (!blocksHaveEqualStructure(plans[i]->getCurrentHeader(), result_header))
|
||||
{
|
||||
auto actions_dag = ActionsDAG::makeConvertingActions(
|
||||
plans[i]->getCurrentDataStream().header.getColumnsWithTypeAndName(),
|
||||
plans[i]->getCurrentHeader().getColumnsWithTypeAndName(),
|
||||
result_header.getColumnsWithTypeAndName(),
|
||||
ActionsDAG::MatchColumnsMode::Position);
|
||||
auto converting_step = std::make_unique<ExpressionStep>(plans[i]->getCurrentDataStream(), std::move(actions_dag));
|
||||
auto converting_step = std::make_unique<ExpressionStep>(plans[i]->getCurrentHeader(), std::move(actions_dag));
|
||||
converting_step->setStepDescription("Conversion before UNION");
|
||||
plans[i]->addStep(std::move(converting_step));
|
||||
}
|
||||
|
||||
data_streams[i] = plans[i]->getCurrentDataStream();
|
||||
headers[i] = plans[i]->getCurrentHeader();
|
||||
}
|
||||
|
||||
auto max_threads = settings[Setting::max_threads];
|
||||
auto union_step = std::make_unique<UnionStep>(std::move(data_streams), max_threads);
|
||||
auto union_step = std::make_unique<UnionStep>(std::move(headers), max_threads);
|
||||
|
||||
query_plan.unitePlans(std::move(union_step), std::move(plans));
|
||||
|
||||
@ -349,7 +349,7 @@ void InterpreterSelectWithUnionQuery::buildQueryPlan(QueryPlan & query_plan)
|
||||
SizeLimits limits(settings[Setting::max_rows_in_distinct], settings[Setting::max_bytes_in_distinct], settings[Setting::distinct_overflow_mode]);
|
||||
|
||||
auto distinct_step = std::make_unique<DistinctStep>(
|
||||
query_plan.getCurrentDataStream(),
|
||||
query_plan.getCurrentHeader(),
|
||||
limits,
|
||||
0,
|
||||
result_header.getNames(),
|
||||
@ -364,13 +364,13 @@ void InterpreterSelectWithUnionQuery::buildQueryPlan(QueryPlan & query_plan)
|
||||
if (settings[Setting::limit] > 0)
|
||||
{
|
||||
auto limit = std::make_unique<LimitStep>(
|
||||
query_plan.getCurrentDataStream(), settings[Setting::limit], settings[Setting::offset], settings[Setting::exact_rows_before_limit]);
|
||||
query_plan.getCurrentHeader(), settings[Setting::limit], settings[Setting::offset], settings[Setting::exact_rows_before_limit]);
|
||||
limit->setStepDescription("LIMIT OFFSET for SETTINGS");
|
||||
query_plan.addStep(std::move(limit));
|
||||
}
|
||||
else
|
||||
{
|
||||
auto offset = std::make_unique<OffsetStep>(query_plan.getCurrentDataStream(), settings[Setting::offset]);
|
||||
auto offset = std::make_unique<OffsetStep>(query_plan.getCurrentHeader(), settings[Setting::offset]);
|
||||
offset->setStepDescription("OFFSET for SETTINGS");
|
||||
query_plan.addStep(std::move(offset));
|
||||
}
|
||||
|
@ -1313,17 +1313,17 @@ QueryPipelineBuilder MutationsInterpreter::addStreamsForLaterStages(const std::v
|
||||
{
|
||||
auto dag = step->actions()->dag.clone();
|
||||
if (step->actions()->project_input)
|
||||
dag.appendInputsForUnusedColumns(plan.getCurrentDataStream().header);
|
||||
dag.appendInputsForUnusedColumns(plan.getCurrentHeader());
|
||||
/// Execute DELETEs.
|
||||
plan.addStep(std::make_unique<FilterStep>(plan.getCurrentDataStream(), std::move(dag), stage.filter_column_names[i], false));
|
||||
plan.addStep(std::make_unique<FilterStep>(plan.getCurrentHeader(), std::move(dag), stage.filter_column_names[i], false));
|
||||
}
|
||||
else
|
||||
{
|
||||
auto dag = step->actions()->dag.clone();
|
||||
if (step->actions()->project_input)
|
||||
dag.appendInputsForUnusedColumns(plan.getCurrentDataStream().header);
|
||||
dag.appendInputsForUnusedColumns(plan.getCurrentHeader());
|
||||
/// Execute UPDATE or final projection.
|
||||
plan.addStep(std::make_unique<ExpressionStep>(plan.getCurrentDataStream(), std::move(dag)));
|
||||
plan.addStep(std::make_unique<ExpressionStep>(plan.getCurrentHeader(), std::move(dag)));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -119,7 +119,7 @@ FutureSetFromSubquery::FutureSetFromSubquery(
|
||||
auto size_limits = getSizeLimitsForSet(settings);
|
||||
set_and_key->set
|
||||
= std::make_shared<Set>(size_limits, settings[Setting::use_index_for_in_with_subqueries_max_values], settings[Setting::transform_null_in]);
|
||||
set_and_key->set->setHeader(source->getCurrentDataStream().header.getColumnsWithTypeAndName());
|
||||
set_and_key->set->setHeader(source->getCurrentHeader().getColumnsWithTypeAndName());
|
||||
}
|
||||
|
||||
FutureSetFromSubquery::FutureSetFromSubquery(
|
||||
@ -149,7 +149,7 @@ SetPtr FutureSetFromSubquery::get() const
|
||||
void FutureSetFromSubquery::setQueryPlan(std::unique_ptr<QueryPlan> source_)
|
||||
{
|
||||
source = std::move(source_);
|
||||
set_and_key->set->setHeader(source->getCurrentDataStream().header.getColumnsWithTypeAndName());
|
||||
set_and_key->set->setHeader(source->getCurrentHeader().getColumnsWithTypeAndName());
|
||||
}
|
||||
|
||||
DataTypes FutureSetFromSubquery::getTypes() const
|
||||
@ -170,7 +170,7 @@ std::unique_ptr<QueryPlan> FutureSetFromSubquery::build(const ContextPtr & conte
|
||||
return nullptr;
|
||||
|
||||
auto creating_set = std::make_unique<CreatingSetStep>(
|
||||
plan->getCurrentDataStream(),
|
||||
plan->getCurrentHeader(),
|
||||
set_and_key,
|
||||
external_table,
|
||||
SizeLimits(settings[Setting::max_rows_to_transfer], settings[Setting::max_bytes_to_transfer], settings[Setting::transfer_overflow_mode]),
|
||||
|
@ -238,6 +238,8 @@ public:
|
||||
|
||||
const Columns & getOrderedSet() const { return ordered_set; }
|
||||
|
||||
const std::vector<KeyTuplePositionMapping> & getIndexesMapping() const { return indexes_mapping; }
|
||||
|
||||
private:
|
||||
// If all arguments in tuple are key columns, we can optimize NOT IN when there is only one element.
|
||||
bool has_all_keys;
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include <Common/MemoryTrackerBlockerInThread.h>
|
||||
#include <Common/SensitiveDataMasker.h>
|
||||
#include <Common/FailPoint.h>
|
||||
#include <Common/FieldVisitorToString.h>
|
||||
|
||||
#include <Interpreters/AsynchronousInsertQueue.h>
|
||||
#include <Interpreters/Cache/QueryCache.h>
|
||||
@ -155,7 +156,7 @@ namespace Setting
|
||||
extern const SettingsBool use_query_cache;
|
||||
extern const SettingsBool wait_for_async_insert;
|
||||
extern const SettingsSeconds wait_for_async_insert_timeout;
|
||||
extern const SettingsBool enable_secure_identifiers;
|
||||
extern const SettingsBool enforce_strict_identifier_format;
|
||||
}
|
||||
|
||||
namespace ErrorCodes
|
||||
@ -565,6 +566,25 @@ void logQueryFinish(
|
||||
query_span->addAttributeIfNotZero("clickhouse.written_rows", elem.written_rows);
|
||||
query_span->addAttributeIfNotZero("clickhouse.written_bytes", elem.written_bytes);
|
||||
query_span->addAttributeIfNotZero("clickhouse.memory_usage", elem.memory_usage);
|
||||
|
||||
if (context)
|
||||
{
|
||||
std::string user_name = context->getUserName();
|
||||
query_span->addAttribute("clickhouse.user", user_name);
|
||||
}
|
||||
|
||||
if (settings[Setting::log_query_settings])
|
||||
{
|
||||
auto changed_settings_names = settings.getChangedNames();
|
||||
for (const auto & name : changed_settings_names)
|
||||
{
|
||||
Field value = settings.get(name);
|
||||
String value_str = convertFieldToString(value);
|
||||
|
||||
query_span->addAttribute(fmt::format("clickhouse.setting.{}", name), value_str);
|
||||
|
||||
}
|
||||
}
|
||||
query_span->finish();
|
||||
}
|
||||
}
|
||||
@ -999,12 +1019,12 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
|
||||
InterpreterSetQuery::applySettingsFromQuery(ast, context);
|
||||
validateAnalyzerSettings(ast, settings[Setting::allow_experimental_analyzer]);
|
||||
|
||||
if (settings[Setting::enable_secure_identifiers])
|
||||
if (settings[Setting::enforce_strict_identifier_format])
|
||||
{
|
||||
WriteBufferFromOwnString buf;
|
||||
IAST::FormatSettings enable_secure_identifiers_settings(buf, true);
|
||||
enable_secure_identifiers_settings.enable_secure_identifiers = true;
|
||||
ast->format(enable_secure_identifiers_settings);
|
||||
IAST::FormatSettings enforce_strict_identifier_format_settings(buf, true);
|
||||
enforce_strict_identifier_format_settings.enforce_strict_identifier_format = true;
|
||||
ast->format(enforce_strict_identifier_format_settings);
|
||||
}
|
||||
|
||||
if (auto * insert_query = ast->as<ASTInsertQuery>())
|
||||
|
@ -372,7 +372,7 @@ TEST_F(FileCacheTest, LRUPolicy)
|
||||
std::cerr << "Step 1\n";
|
||||
auto cache = DB::FileCache("1", settings);
|
||||
cache.initialize();
|
||||
auto key = DB::FileCache::createKeyForPath("key1");
|
||||
auto key = DB::FileCacheKey::fromPath("key1");
|
||||
|
||||
auto get_or_set = [&](size_t offset, size_t size)
|
||||
{
|
||||
@ -736,7 +736,7 @@ TEST_F(FileCacheTest, LRUPolicy)
|
||||
|
||||
auto cache2 = DB::FileCache("2", settings);
|
||||
cache2.initialize();
|
||||
auto key = DB::FileCache::createKeyForPath("key1");
|
||||
auto key = DB::FileCacheKey::fromPath("key1");
|
||||
|
||||
/// Get [2, 29]
|
||||
assertEqual(
|
||||
@ -755,7 +755,7 @@ TEST_F(FileCacheTest, LRUPolicy)
|
||||
fs::create_directories(settings2.base_path);
|
||||
auto cache2 = DB::FileCache("3", settings2);
|
||||
cache2.initialize();
|
||||
auto key = DB::FileCache::createKeyForPath("key1");
|
||||
auto key = DB::FileCacheKey::fromPath("key1");
|
||||
|
||||
/// Get [0, 24]
|
||||
assertEqual(
|
||||
@ -770,7 +770,7 @@ TEST_F(FileCacheTest, LRUPolicy)
|
||||
|
||||
auto cache = FileCache("4", settings);
|
||||
cache.initialize();
|
||||
const auto key = FileCache::createKeyForPath("key10");
|
||||
const auto key = FileCacheKey::fromPath("key10");
|
||||
const auto key_path = cache.getKeyPath(key, user);
|
||||
|
||||
cache.removeAllReleasable(user.user_id);
|
||||
@ -794,7 +794,7 @@ TEST_F(FileCacheTest, LRUPolicy)
|
||||
|
||||
auto cache = DB::FileCache("5", settings);
|
||||
cache.initialize();
|
||||
const auto key = FileCache::createKeyForPath("key10");
|
||||
const auto key = FileCacheKey::fromPath("key10");
|
||||
const auto key_path = cache.getKeyPath(key, user);
|
||||
|
||||
cache.removeAllReleasable(user.user_id);
|
||||
@ -833,7 +833,7 @@ TEST_F(FileCacheTest, writeBuffer)
|
||||
segment_settings.kind = FileSegmentKind::Ephemeral;
|
||||
segment_settings.unbounded = true;
|
||||
|
||||
auto cache_key = FileCache::createKeyForPath(key);
|
||||
auto cache_key = FileCacheKey::fromPath(key);
|
||||
auto holder = cache.set(cache_key, 0, 3, segment_settings, user);
|
||||
/// The same is done in TemporaryDataOnDisk::createStreamToCacheFile.
|
||||
std::filesystem::create_directories(cache.getKeyPath(cache_key, user));
|
||||
@ -961,7 +961,7 @@ TEST_F(FileCacheTest, temporaryData)
|
||||
const auto user = FileCache::getCommonUser();
|
||||
auto tmp_data_scope = std::make_shared<TemporaryDataOnDiskScope>(nullptr, &file_cache, TemporaryDataOnDiskSettings{});
|
||||
|
||||
auto some_data_holder = file_cache.getOrSet(FileCache::createKeyForPath("some_data"), 0, 5_KiB, 5_KiB, CreateFileSegmentSettings{}, 0, user);
|
||||
auto some_data_holder = file_cache.getOrSet(FileCacheKey::fromPath("some_data"), 0, 5_KiB, 5_KiB, CreateFileSegmentSettings{}, 0, user);
|
||||
|
||||
{
|
||||
ASSERT_EQ(some_data_holder->size(), 5);
|
||||
@ -1103,7 +1103,7 @@ TEST_F(FileCacheTest, CachedReadBuffer)
|
||||
auto cache = std::make_shared<DB::FileCache>("8", settings);
|
||||
cache->initialize();
|
||||
|
||||
auto key = cache->createKeyForPath(file_path);
|
||||
auto key = DB::FileCacheKey::fromPath(file_path);
|
||||
const auto user = FileCache::getCommonUser();
|
||||
|
||||
{
|
||||
@ -1219,7 +1219,7 @@ TEST_F(FileCacheTest, SLRUPolicy)
|
||||
{
|
||||
auto cache = DB::FileCache(std::to_string(++file_cache_name), settings);
|
||||
cache.initialize();
|
||||
auto key = FileCache::createKeyForPath("key1");
|
||||
auto key = FileCacheKey::fromPath("key1");
|
||||
|
||||
auto add_range = [&](size_t offset, size_t size)
|
||||
{
|
||||
@ -1342,7 +1342,7 @@ TEST_F(FileCacheTest, SLRUPolicy)
|
||||
|
||||
std::string data1(15, '*');
|
||||
auto file1 = write_file("test1", data1);
|
||||
auto key1 = cache->createKeyForPath(file1);
|
||||
auto key1 = DB::FileCacheKey::fromPath(file1);
|
||||
|
||||
read_and_check(file1, key1, data1);
|
||||
|
||||
@ -1358,7 +1358,7 @@ TEST_F(FileCacheTest, SLRUPolicy)
|
||||
|
||||
std::string data2(10, '*');
|
||||
auto file2 = write_file("test2", data2);
|
||||
auto key2 = cache->createKeyForPath(file2);
|
||||
auto key2 = DB::FileCacheKey::fromPath(file2);
|
||||
|
||||
read_and_check(file2, key2, data2);
|
||||
|
||||
|
@ -58,9 +58,16 @@ protected:
|
||||
settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " PARTITION " << (settings.hilite ? hilite_none : "");
|
||||
partition->formatImpl(settings, state, frame);
|
||||
}
|
||||
|
||||
if (!part_name.empty())
|
||||
{
|
||||
settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " PART " << (settings.hilite ? hilite_none : "")
|
||||
<< quoteString(part_name);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
struct ASTCheckAllTablesQuery : public ASTQueryWithOutput
|
||||
{
|
||||
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <Poco/String.h>
|
||||
#include <Common/SensitiveDataMasker.h>
|
||||
#include <Common/SipHash.h>
|
||||
#include <Common/StringUtils.h>
|
||||
#include <algorithm>
|
||||
|
||||
namespace DB
|
||||
@ -265,14 +266,14 @@ void IAST::FormatSettings::writeIdentifier(const String & name, bool ambiguous)
|
||||
|
||||
void IAST::FormatSettings::checkIdentifier(const String & name) const
|
||||
{
|
||||
if (enable_secure_identifiers)
|
||||
if (enforce_strict_identifier_format)
|
||||
{
|
||||
bool is_secure_identifier = std::all_of(name.begin(), name.end(), [](char ch) { return std::isalnum(ch) || ch == '_'; });
|
||||
if (!is_secure_identifier)
|
||||
bool is_word_char_identifier = std::all_of(name.begin(), name.end(), isWordCharASCII);
|
||||
if (!is_word_char_identifier)
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS,
|
||||
"Not a secure identifier: `{}`, a secure identifier must contain only underscore and alphanumeric characters",
|
||||
"Identifier '{}' contains characters other than alphanumeric and cannot be when enforce_strict_identifier_format is enabled",
|
||||
name);
|
||||
}
|
||||
}
|
||||
|
@ -202,7 +202,7 @@ public:
|
||||
char nl_or_ws; /// Newline or whitespace.
|
||||
LiteralEscapingStyle literal_escaping_style;
|
||||
bool print_pretty_type_names;
|
||||
bool enable_secure_identifiers;
|
||||
bool enforce_strict_identifier_format;
|
||||
|
||||
explicit FormatSettings(
|
||||
WriteBuffer & ostr_,
|
||||
@ -213,7 +213,7 @@ public:
|
||||
bool show_secrets_ = true,
|
||||
LiteralEscapingStyle literal_escaping_style_ = LiteralEscapingStyle::Regular,
|
||||
bool print_pretty_type_names_ = false,
|
||||
bool enable_secure_identifiers_ = false)
|
||||
bool enforce_strict_identifier_format_ = false)
|
||||
: ostr(ostr_)
|
||||
, one_line(one_line_)
|
||||
, hilite(hilite_)
|
||||
@ -223,7 +223,7 @@ public:
|
||||
, nl_or_ws(one_line ? ' ' : '\n')
|
||||
, literal_escaping_style(literal_escaping_style_)
|
||||
, print_pretty_type_names(print_pretty_type_names_)
|
||||
, enable_secure_identifiers(enable_secure_identifiers_)
|
||||
, enforce_strict_identifier_format(enforce_strict_identifier_format_)
|
||||
{
|
||||
}
|
||||
|
||||
@ -237,7 +237,7 @@ public:
|
||||
, nl_or_ws(other.nl_or_ws)
|
||||
, literal_escaping_style(other.literal_escaping_style)
|
||||
, print_pretty_type_names(other.print_pretty_type_names)
|
||||
, enable_secure_identifiers(other.enable_secure_identifiers)
|
||||
, enforce_strict_identifier_format(other.enforce_strict_identifier_format)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -384,9 +384,9 @@ void addExpressionStep(QueryPlan & query_plan,
|
||||
{
|
||||
auto actions = std::move(expression_actions->dag);
|
||||
if (expression_actions->project_input)
|
||||
actions.appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header);
|
||||
actions.appendInputsForUnusedColumns(query_plan.getCurrentHeader());
|
||||
|
||||
auto expression_step = std::make_unique<ExpressionStep>(query_plan.getCurrentDataStream(), std::move(actions));
|
||||
auto expression_step = std::make_unique<ExpressionStep>(query_plan.getCurrentHeader(), std::move(actions));
|
||||
appendSetsFromActionsDAG(expression_step->getExpression(), useful_sets);
|
||||
expression_step->setStepDescription(step_description);
|
||||
query_plan.addStep(std::move(expression_step));
|
||||
@ -399,9 +399,9 @@ void addFilterStep(QueryPlan & query_plan,
|
||||
{
|
||||
auto actions = std::move(filter_analysis_result.filter_actions->dag);
|
||||
if (filter_analysis_result.filter_actions->project_input)
|
||||
actions.appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header);
|
||||
actions.appendInputsForUnusedColumns(query_plan.getCurrentHeader());
|
||||
|
||||
auto where_step = std::make_unique<FilterStep>(query_plan.getCurrentDataStream(),
|
||||
auto where_step = std::make_unique<FilterStep>(query_plan.getCurrentHeader(),
|
||||
std::move(actions),
|
||||
filter_analysis_result.filter_column_name,
|
||||
filter_analysis_result.remove_filter_column);
|
||||
@ -507,7 +507,7 @@ void addAggregationStep(QueryPlan & query_plan,
|
||||
}
|
||||
|
||||
auto aggregating_step = std::make_unique<AggregatingStep>(
|
||||
query_plan.getCurrentDataStream(),
|
||||
query_plan.getCurrentHeader(),
|
||||
aggregator_params,
|
||||
aggregation_analysis_result.grouping_sets_parameters_list,
|
||||
query_analysis_result.aggregate_final,
|
||||
@ -570,7 +570,7 @@ void addMergingAggregatedStep(QueryPlan & query_plan,
|
||||
}
|
||||
|
||||
auto merging_aggregated = std::make_unique<MergingAggregatedStep>(
|
||||
query_plan.getCurrentDataStream(),
|
||||
query_plan.getCurrentHeader(),
|
||||
params,
|
||||
aggregation_analysis_result.grouping_sets_parameters_list,
|
||||
query_analysis_result.aggregate_final,
|
||||
@ -605,11 +605,11 @@ void addTotalsHavingStep(QueryPlan & query_plan,
|
||||
{
|
||||
actions = std::move(having_analysis_result.filter_actions->dag);
|
||||
if (having_analysis_result.filter_actions->project_input)
|
||||
actions->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header);
|
||||
actions->appendInputsForUnusedColumns(query_plan.getCurrentHeader());
|
||||
}
|
||||
|
||||
auto totals_having_step = std::make_unique<TotalsHavingStep>(
|
||||
query_plan.getCurrentDataStream(),
|
||||
query_plan.getCurrentHeader(),
|
||||
aggregation_analysis_result.aggregate_descriptions,
|
||||
query_analysis_result.aggregate_overflow_row,
|
||||
std::move(actions),
|
||||
@ -647,13 +647,13 @@ void addCubeOrRollupStepIfNeeded(QueryPlan & query_plan,
|
||||
if (query_node.isGroupByWithRollup())
|
||||
{
|
||||
auto rollup_step = std::make_unique<RollupStep>(
|
||||
query_plan.getCurrentDataStream(), std::move(aggregator_params), true /*final*/, settings[Setting::group_by_use_nulls]);
|
||||
query_plan.getCurrentHeader(), std::move(aggregator_params), true /*final*/, settings[Setting::group_by_use_nulls]);
|
||||
query_plan.addStep(std::move(rollup_step));
|
||||
}
|
||||
else if (query_node.isGroupByWithCube())
|
||||
{
|
||||
auto cube_step = std::make_unique<CubeStep>(
|
||||
query_plan.getCurrentDataStream(), std::move(aggregator_params), true /*final*/, settings[Setting::group_by_use_nulls]);
|
||||
query_plan.getCurrentHeader(), std::move(aggregator_params), true /*final*/, settings[Setting::group_by_use_nulls]);
|
||||
query_plan.addStep(std::move(cube_step));
|
||||
}
|
||||
}
|
||||
@ -687,7 +687,7 @@ void addDistinctStep(QueryPlan & query_plan,
|
||||
SizeLimits limits(settings[Setting::max_rows_in_distinct], settings[Setting::max_bytes_in_distinct], settings[Setting::distinct_overflow_mode]);
|
||||
|
||||
auto distinct_step = std::make_unique<DistinctStep>(
|
||||
query_plan.getCurrentDataStream(),
|
||||
query_plan.getCurrentHeader(),
|
||||
limits,
|
||||
limit_hint_for_distinct,
|
||||
column_names,
|
||||
@ -706,7 +706,7 @@ void addSortingStep(QueryPlan & query_plan,
|
||||
SortingStep::Settings sort_settings(*query_context);
|
||||
|
||||
auto sorting_step = std::make_unique<SortingStep>(
|
||||
query_plan.getCurrentDataStream(),
|
||||
query_plan.getCurrentHeader(),
|
||||
sort_description,
|
||||
query_analysis_result.partial_sorting_limit,
|
||||
sort_settings);
|
||||
@ -725,7 +725,7 @@ void addMergeSortingStep(QueryPlan & query_plan,
|
||||
const auto & sort_description = query_analysis_result.sort_description;
|
||||
|
||||
auto merging_sorted = std::make_unique<SortingStep>(
|
||||
query_plan.getCurrentDataStream(),
|
||||
query_plan.getCurrentHeader(),
|
||||
sort_description,
|
||||
settings[Setting::max_block_size],
|
||||
query_analysis_result.partial_sorting_limit,
|
||||
@ -761,7 +761,7 @@ void addWithFillStepIfNeeded(QueryPlan & query_plan,
|
||||
if (query_node.hasInterpolate())
|
||||
{
|
||||
ActionsDAG interpolate_actions_dag;
|
||||
auto query_plan_columns = query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName();
|
||||
auto query_plan_columns = query_plan.getCurrentHeader().getColumnsWithTypeAndName();
|
||||
for (auto & query_plan_column : query_plan_columns)
|
||||
{
|
||||
/// INTERPOLATE actions dag input columns must be non constant
|
||||
@ -846,7 +846,7 @@ void addWithFillStepIfNeeded(QueryPlan & query_plan,
|
||||
const auto & query_context = planner_context->getQueryContext();
|
||||
const Settings & settings = query_context->getSettingsRef();
|
||||
auto filling_step = std::make_unique<FillingStep>(
|
||||
query_plan.getCurrentDataStream(),
|
||||
query_plan.getCurrentHeader(),
|
||||
sort_description,
|
||||
std::move(fill_description),
|
||||
interpolate_description,
|
||||
@ -868,7 +868,7 @@ void addLimitByStep(QueryPlan & query_plan,
|
||||
limit_by_offset = query_node.getLimitByOffset()->as<ConstantNode &>().getValue().safeGet<UInt64>();
|
||||
}
|
||||
|
||||
auto limit_by_step = std::make_unique<LimitByStep>(query_plan.getCurrentDataStream(),
|
||||
auto limit_by_step = std::make_unique<LimitByStep>(query_plan.getCurrentHeader(),
|
||||
limit_by_limit,
|
||||
limit_by_offset,
|
||||
limit_by_analysis_result.limit_by_column_names);
|
||||
@ -896,7 +896,7 @@ void addPreliminaryLimitStep(QueryPlan & query_plan,
|
||||
const Settings & settings = query_context->getSettingsRef();
|
||||
|
||||
auto limit
|
||||
= std::make_unique<LimitStep>(query_plan.getCurrentDataStream(), limit_length, limit_offset, settings[Setting::exact_rows_before_limit]);
|
||||
= std::make_unique<LimitStep>(query_plan.getCurrentHeader(), limit_length, limit_offset, settings[Setting::exact_rows_before_limit]);
|
||||
limit->setStepDescription(do_not_skip_offset ? "preliminary LIMIT (with OFFSET)" : "preliminary LIMIT (without OFFSET)");
|
||||
query_plan.addStep(std::move(limit));
|
||||
}
|
||||
@ -1023,7 +1023,7 @@ void addWindowSteps(QueryPlan & query_plan,
|
||||
SortingStep::Settings sort_settings(*query_context);
|
||||
|
||||
auto sorting_step = std::make_unique<SortingStep>(
|
||||
query_plan.getCurrentDataStream(),
|
||||
query_plan.getCurrentHeader(),
|
||||
window_description.full_sort_description,
|
||||
window_description.partition_by,
|
||||
0 /*limit*/,
|
||||
@ -1038,7 +1038,7 @@ void addWindowSteps(QueryPlan & query_plan,
|
||||
= settings[Setting::query_plan_enable_multithreading_after_window_functions] && ((i + 1) == window_descriptions_size);
|
||||
|
||||
auto window_step
|
||||
= std::make_unique<WindowStep>(query_plan.getCurrentDataStream(), window_description, window_description.window_functions, streams_fan_out);
|
||||
= std::make_unique<WindowStep>(query_plan.getCurrentHeader(), window_description, window_description.window_functions, streams_fan_out);
|
||||
window_step->setStepDescription("Window step for window '" + window_description.window_name + "'");
|
||||
query_plan.addStep(std::move(window_step));
|
||||
}
|
||||
@ -1084,7 +1084,7 @@ void addLimitStep(QueryPlan & query_plan,
|
||||
UInt64 limit_offset = query_analysis_result.limit_offset;
|
||||
|
||||
auto limit = std::make_unique<LimitStep>(
|
||||
query_plan.getCurrentDataStream(),
|
||||
query_plan.getCurrentHeader(),
|
||||
limit_length,
|
||||
limit_offset,
|
||||
always_read_till_end,
|
||||
@ -1103,7 +1103,7 @@ void addExtremesStepIfNeeded(QueryPlan & query_plan, const PlannerContextPtr & p
|
||||
if (!query_context->getSettingsRef()[Setting::extremes])
|
||||
return;
|
||||
|
||||
auto extremes_step = std::make_unique<ExtremesStep>(query_plan.getCurrentDataStream());
|
||||
auto extremes_step = std::make_unique<ExtremesStep>(query_plan.getCurrentHeader());
|
||||
query_plan.addStep(std::move(extremes_step));
|
||||
}
|
||||
|
||||
@ -1112,7 +1112,7 @@ void addOffsetStep(QueryPlan & query_plan, const QueryAnalysisResult & query_ana
|
||||
/// If there is not a LIMIT but an offset
|
||||
if (!query_analysis_result.limit_length && query_analysis_result.limit_offset)
|
||||
{
|
||||
auto offsets_step = std::make_unique<OffsetStep>(query_plan.getCurrentDataStream(), query_analysis_result.limit_offset);
|
||||
auto offsets_step = std::make_unique<OffsetStep>(query_plan.getCurrentHeader(), query_analysis_result.limit_offset);
|
||||
query_plan.addStep(std::move(offsets_step));
|
||||
}
|
||||
}
|
||||
@ -1150,7 +1150,7 @@ void addBuildSubqueriesForSetsStepIfNeeded(
|
||||
if (!subqueries.empty())
|
||||
{
|
||||
auto step = std::make_unique<DelayedCreatingSetsStep>(
|
||||
query_plan.getCurrentDataStream(),
|
||||
query_plan.getCurrentHeader(),
|
||||
std::move(subqueries),
|
||||
planner_context->getQueryContext());
|
||||
|
||||
@ -1190,7 +1190,7 @@ void addAdditionalFilterStepIfNeeded(QueryPlan & query_plan,
|
||||
if (!query_plan.isInitialized())
|
||||
return;
|
||||
|
||||
auto filter_step = std::make_unique<FilterStep>(query_plan.getCurrentDataStream(),
|
||||
auto filter_step = std::make_unique<FilterStep>(query_plan.getCurrentHeader(),
|
||||
std::move(filter_info.actions),
|
||||
filter_info.column_name,
|
||||
filter_info.do_remove_column);
|
||||
@ -1329,31 +1329,27 @@ void Planner::buildPlanForUnionNode()
|
||||
const auto & mapping = query_planner.getQueryNodeToPlanStepMapping();
|
||||
query_node_to_plan_step_mapping.insert(mapping.begin(), mapping.end());
|
||||
auto query_node_plan = std::make_unique<QueryPlan>(std::move(query_planner).extractQueryPlan());
|
||||
query_plans_headers.push_back(query_node_plan->getCurrentDataStream().header);
|
||||
query_plans_headers.push_back(query_node_plan->getCurrentHeader());
|
||||
query_plans.push_back(std::move(query_node_plan));
|
||||
}
|
||||
|
||||
Block union_common_header = buildCommonHeaderForUnion(query_plans_headers, union_mode);
|
||||
DataStreams query_plans_streams;
|
||||
query_plans_streams.reserve(query_plans.size());
|
||||
|
||||
for (auto & query_node_plan : query_plans)
|
||||
for (size_t i = 0; i < queries_size; ++i)
|
||||
{
|
||||
if (blocksHaveEqualStructure(query_node_plan->getCurrentDataStream().header, union_common_header))
|
||||
{
|
||||
query_plans_streams.push_back(query_node_plan->getCurrentDataStream());
|
||||
auto & query_node_plan = query_plans[i];
|
||||
if (blocksHaveEqualStructure(query_node_plan->getCurrentHeader(), union_common_header))
|
||||
continue;
|
||||
}
|
||||
|
||||
auto actions_dag = ActionsDAG::makeConvertingActions(
|
||||
query_node_plan->getCurrentDataStream().header.getColumnsWithTypeAndName(),
|
||||
query_node_plan->getCurrentHeader().getColumnsWithTypeAndName(),
|
||||
union_common_header.getColumnsWithTypeAndName(),
|
||||
ActionsDAG::MatchColumnsMode::Position);
|
||||
auto converting_step = std::make_unique<ExpressionStep>(query_node_plan->getCurrentDataStream(), std::move(actions_dag));
|
||||
auto converting_step = std::make_unique<ExpressionStep>(query_node_plan->getCurrentHeader(), std::move(actions_dag));
|
||||
converting_step->setStepDescription("Conversion before UNION");
|
||||
query_node_plan->addStep(std::move(converting_step));
|
||||
|
||||
query_plans_streams.push_back(query_node_plan->getCurrentDataStream());
|
||||
query_plans_headers[i] = query_node_plan->getCurrentHeader();
|
||||
}
|
||||
|
||||
const auto & query_context = planner_context->getQueryContext();
|
||||
@ -1365,7 +1361,7 @@ void Planner::buildPlanForUnionNode()
|
||||
|
||||
if (union_mode == SelectUnionMode::UNION_ALL || union_mode == SelectUnionMode::UNION_DISTINCT)
|
||||
{
|
||||
auto union_step = std::make_unique<UnionStep>(std::move(query_plans_streams), max_threads);
|
||||
auto union_step = std::make_unique<UnionStep>(std::move(query_plans_headers), max_threads);
|
||||
query_plan.unitePlans(std::move(union_step), std::move(query_plans));
|
||||
}
|
||||
else if (union_mode == SelectUnionMode::INTERSECT_ALL || union_mode == SelectUnionMode::INTERSECT_DISTINCT
|
||||
@ -1383,7 +1379,7 @@ void Planner::buildPlanForUnionNode()
|
||||
intersect_or_except_operator = IntersectOrExceptStep::Operator::EXCEPT_DISTINCT;
|
||||
|
||||
auto union_step
|
||||
= std::make_unique<IntersectOrExceptStep>(std::move(query_plans_streams), intersect_or_except_operator, max_threads);
|
||||
= std::make_unique<IntersectOrExceptStep>(std::move(query_plans_headers), intersect_or_except_operator, max_threads);
|
||||
query_plan.unitePlans(std::move(union_step), std::move(query_plans));
|
||||
}
|
||||
|
||||
@ -1393,10 +1389,10 @@ void Planner::buildPlanForUnionNode()
|
||||
SizeLimits limits(settings[Setting::max_rows_in_distinct], settings[Setting::max_bytes_in_distinct], settings[Setting::distinct_overflow_mode]);
|
||||
|
||||
auto distinct_step = std::make_unique<DistinctStep>(
|
||||
query_plan.getCurrentDataStream(),
|
||||
query_plan.getCurrentHeader(),
|
||||
limits,
|
||||
0 /*limit hint*/,
|
||||
query_plan.getCurrentDataStream().header.getNames(),
|
||||
query_plan.getCurrentHeader().getNames(),
|
||||
false /*pre distinct*/);
|
||||
query_plan.addStep(std::move(distinct_step));
|
||||
}
|
||||
@ -1558,7 +1554,7 @@ void Planner::buildPlanForQueryNode()
|
||||
PlannerQueryProcessingInfo query_processing_info(from_stage, select_query_options.to_stage);
|
||||
QueryAnalysisResult query_analysis_result(query_tree, query_processing_info, planner_context);
|
||||
auto expression_analysis_result = buildExpressionAnalysisResult(query_tree,
|
||||
query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName(),
|
||||
query_plan.getCurrentHeader().getColumnsWithTypeAndName(),
|
||||
planner_context,
|
||||
query_processing_info);
|
||||
|
||||
|
@ -637,9 +637,9 @@ UInt64 mainQueryNodeBlockSizeByLimit(const SelectQueryInfo & select_query_info)
|
||||
}
|
||||
|
||||
std::unique_ptr<ExpressionStep> createComputeAliasColumnsStep(
|
||||
std::unordered_map<std::string, ActionsDAG> & alias_column_expressions, const DataStream & current_data_stream)
|
||||
std::unordered_map<std::string, ActionsDAG> & alias_column_expressions, const Header & current_header)
|
||||
{
|
||||
ActionsDAG merged_alias_columns_actions_dag(current_data_stream.header.getColumnsWithTypeAndName());
|
||||
ActionsDAG merged_alias_columns_actions_dag(current_header.getColumnsWithTypeAndName());
|
||||
ActionsDAG::NodeRawConstPtrs action_dag_outputs = merged_alias_columns_actions_dag.getInputs();
|
||||
|
||||
for (auto & [column_name, alias_column_actions_dag] : alias_column_expressions)
|
||||
@ -653,7 +653,7 @@ std::unique_ptr<ExpressionStep> createComputeAliasColumnsStep(
|
||||
merged_alias_columns_actions_dag.addOrReplaceInOutputs(*output_node);
|
||||
merged_alias_columns_actions_dag.removeUnusedActions(false);
|
||||
|
||||
auto alias_column_step = std::make_unique<ExpressionStep>(current_data_stream, std::move(merged_alias_columns_actions_dag));
|
||||
auto alias_column_step = std::make_unique<ExpressionStep>(current_header, std::move(merged_alias_columns_actions_dag));
|
||||
alias_column_step->setStepDescription("Compute alias columns");
|
||||
return alias_column_step;
|
||||
}
|
||||
@ -1065,7 +1065,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres
|
||||
auto & alias_column_expressions = table_expression_data.getAliasColumnExpressions();
|
||||
if (!alias_column_expressions.empty() && query_plan.isInitialized() && from_stage == QueryProcessingStage::FetchColumns)
|
||||
{
|
||||
auto alias_column_step = createComputeAliasColumnsStep(alias_column_expressions, query_plan.getCurrentDataStream());
|
||||
auto alias_column_step = createComputeAliasColumnsStep(alias_column_expressions, query_plan.getCurrentHeader());
|
||||
query_plan.addStep(std::move(alias_column_step));
|
||||
}
|
||||
|
||||
@ -1074,7 +1074,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres
|
||||
if (query_plan.isInitialized() &&
|
||||
from_stage == QueryProcessingStage::FetchColumns)
|
||||
{
|
||||
auto filter_step = std::make_unique<FilterStep>(query_plan.getCurrentDataStream(),
|
||||
auto filter_step = std::make_unique<FilterStep>(query_plan.getCurrentHeader(),
|
||||
std::move(filter_info.actions),
|
||||
filter_info.column_name,
|
||||
filter_info.do_remove_column);
|
||||
@ -1154,7 +1154,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres
|
||||
auto & alias_column_expressions = table_expression_data.getAliasColumnExpressions();
|
||||
if (!alias_column_expressions.empty() && query_plan.isInitialized() && from_stage == QueryProcessingStage::FetchColumns)
|
||||
{
|
||||
auto alias_column_step = createComputeAliasColumnsStep(alias_column_expressions, query_plan.getCurrentDataStream());
|
||||
auto alias_column_step = createComputeAliasColumnsStep(alias_column_expressions, query_plan.getCurrentHeader());
|
||||
query_plan.addStep(std::move(alias_column_step));
|
||||
}
|
||||
}
|
||||
@ -1166,7 +1166,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres
|
||||
|
||||
if (from_stage == QueryProcessingStage::FetchColumns)
|
||||
{
|
||||
ActionsDAG rename_actions_dag(query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName());
|
||||
ActionsDAG rename_actions_dag(query_plan.getCurrentHeader().getColumnsWithTypeAndName());
|
||||
ActionsDAG::NodeRawConstPtrs updated_actions_dag_outputs;
|
||||
|
||||
for (auto & output_node : rename_actions_dag.getOutputs())
|
||||
@ -1180,7 +1180,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres
|
||||
|
||||
rename_actions_dag.getOutputs() = std::move(updated_actions_dag_outputs);
|
||||
|
||||
auto rename_step = std::make_unique<ExpressionStep>(query_plan.getCurrentDataStream(), std::move(rename_actions_dag));
|
||||
auto rename_step = std::make_unique<ExpressionStep>(query_plan.getCurrentHeader(), std::move(rename_actions_dag));
|
||||
rename_step->setStepDescription("Change column names to column identifiers");
|
||||
query_plan.addStep(std::move(rename_step));
|
||||
}
|
||||
@ -1192,18 +1192,18 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres
|
||||
select_query_info.planner_context);
|
||||
planner.buildQueryPlanIfNeeded();
|
||||
|
||||
auto expected_header = planner.getQueryPlan().getCurrentDataStream().header;
|
||||
auto expected_header = planner.getQueryPlan().getCurrentHeader();
|
||||
|
||||
if (!blocksHaveEqualStructure(query_plan.getCurrentDataStream().header, expected_header))
|
||||
if (!blocksHaveEqualStructure(query_plan.getCurrentHeader(), expected_header))
|
||||
{
|
||||
materializeBlockInplace(expected_header);
|
||||
|
||||
auto rename_actions_dag = ActionsDAG::makeConvertingActions(
|
||||
query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName(),
|
||||
query_plan.getCurrentHeader().getColumnsWithTypeAndName(),
|
||||
expected_header.getColumnsWithTypeAndName(),
|
||||
ActionsDAG::MatchColumnsMode::Position,
|
||||
true /*ignore_constant_values*/);
|
||||
auto rename_step = std::make_unique<ExpressionStep>(query_plan.getCurrentDataStream(), std::move(rename_actions_dag));
|
||||
auto rename_step = std::make_unique<ExpressionStep>(query_plan.getCurrentHeader(), std::move(rename_actions_dag));
|
||||
std::string step_description = table_expression_data.isRemote() ? "Change remote column names to local column names" : "Change column names";
|
||||
rename_step->setStepDescription(std::move(step_description));
|
||||
query_plan.addStep(std::move(rename_step));
|
||||
@ -1220,7 +1220,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres
|
||||
|
||||
void joinCastPlanColumnsToNullable(QueryPlan & plan_to_add_cast, PlannerContextPtr & planner_context, const FunctionOverloadResolverPtr & to_nullable_function)
|
||||
{
|
||||
ActionsDAG cast_actions_dag(plan_to_add_cast.getCurrentDataStream().header.getColumnsWithTypeAndName());
|
||||
ActionsDAG cast_actions_dag(plan_to_add_cast.getCurrentHeader().getColumnsWithTypeAndName());
|
||||
|
||||
for (auto & output_node : cast_actions_dag.getOutputs())
|
||||
{
|
||||
@ -1235,8 +1235,8 @@ void joinCastPlanColumnsToNullable(QueryPlan & plan_to_add_cast, PlannerContextP
|
||||
}
|
||||
}
|
||||
|
||||
cast_actions_dag.appendInputsForUnusedColumns(plan_to_add_cast.getCurrentDataStream().header);
|
||||
auto cast_join_columns_step = std::make_unique<ExpressionStep>(plan_to_add_cast.getCurrentDataStream(), std::move(cast_actions_dag));
|
||||
cast_actions_dag.appendInputsForUnusedColumns(plan_to_add_cast.getCurrentHeader());
|
||||
auto cast_join_columns_step = std::make_unique<ExpressionStep>(plan_to_add_cast.getCurrentHeader(), std::move(cast_actions_dag));
|
||||
cast_join_columns_step->setStepDescription("Cast JOIN columns to Nullable");
|
||||
plan_to_add_cast.addStep(std::move(cast_join_columns_step));
|
||||
}
|
||||
@ -1255,7 +1255,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
|
||||
QueryProcessingStage::toString(left_join_tree_query_plan.from_stage));
|
||||
|
||||
auto left_plan = std::move(left_join_tree_query_plan.query_plan);
|
||||
auto left_plan_output_columns = left_plan.getCurrentDataStream().header.getColumnsWithTypeAndName();
|
||||
auto left_plan_output_columns = left_plan.getCurrentHeader().getColumnsWithTypeAndName();
|
||||
if (right_join_tree_query_plan.from_stage != QueryProcessingStage::FetchColumns)
|
||||
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
|
||||
"JOIN {} right table expression expected to process query to fetch columns stage. Actual {}",
|
||||
@ -1263,7 +1263,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
|
||||
QueryProcessingStage::toString(right_join_tree_query_plan.from_stage));
|
||||
|
||||
auto right_plan = std::move(right_join_tree_query_plan.query_plan);
|
||||
auto right_plan_output_columns = right_plan.getCurrentDataStream().header.getColumnsWithTypeAndName();
|
||||
auto right_plan_output_columns = right_plan.getCurrentHeader().getColumnsWithTypeAndName();
|
||||
|
||||
JoinClausesAndActions join_clauses_and_actions;
|
||||
JoinKind join_kind = join_node.getKind();
|
||||
@ -1281,14 +1281,14 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
|
||||
join_table_expression,
|
||||
planner_context);
|
||||
|
||||
join_clauses_and_actions.left_join_expressions_actions.appendInputsForUnusedColumns(left_plan.getCurrentDataStream().header);
|
||||
auto left_join_expressions_actions_step = std::make_unique<ExpressionStep>(left_plan.getCurrentDataStream(), std::move(join_clauses_and_actions.left_join_expressions_actions));
|
||||
join_clauses_and_actions.left_join_expressions_actions.appendInputsForUnusedColumns(left_plan.getCurrentHeader());
|
||||
auto left_join_expressions_actions_step = std::make_unique<ExpressionStep>(left_plan.getCurrentHeader(), std::move(join_clauses_and_actions.left_join_expressions_actions));
|
||||
left_join_expressions_actions_step->setStepDescription("JOIN actions");
|
||||
appendSetsFromActionsDAG(left_join_expressions_actions_step->getExpression(), left_join_tree_query_plan.useful_sets);
|
||||
left_plan.addStep(std::move(left_join_expressions_actions_step));
|
||||
|
||||
join_clauses_and_actions.right_join_expressions_actions.appendInputsForUnusedColumns(right_plan.getCurrentDataStream().header);
|
||||
auto right_join_expressions_actions_step = std::make_unique<ExpressionStep>(right_plan.getCurrentDataStream(), std::move(join_clauses_and_actions.right_join_expressions_actions));
|
||||
join_clauses_and_actions.right_join_expressions_actions.appendInputsForUnusedColumns(right_plan.getCurrentHeader());
|
||||
auto right_join_expressions_actions_step = std::make_unique<ExpressionStep>(right_plan.getCurrentHeader(), std::move(join_clauses_and_actions.right_join_expressions_actions));
|
||||
right_join_expressions_actions_step->setStepDescription("JOIN actions");
|
||||
appendSetsFromActionsDAG(right_join_expressions_actions_step->getExpression(), right_join_tree_query_plan.useful_sets);
|
||||
right_plan.addStep(std::move(right_join_expressions_actions_step));
|
||||
@ -1328,7 +1328,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
|
||||
|
||||
auto join_cast_plan_output_nodes = [&](QueryPlan & plan_to_add_cast, std::unordered_map<std::string, DataTypePtr> & plan_column_name_to_cast_type)
|
||||
{
|
||||
ActionsDAG cast_actions_dag(plan_to_add_cast.getCurrentDataStream().header.getColumnsWithTypeAndName());
|
||||
ActionsDAG cast_actions_dag(plan_to_add_cast.getCurrentHeader().getColumnsWithTypeAndName());
|
||||
|
||||
for (auto & output_node : cast_actions_dag.getOutputs())
|
||||
{
|
||||
@ -1340,9 +1340,9 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
|
||||
output_node = &cast_actions_dag.addCast(*output_node, cast_type, output_node->result_name);
|
||||
}
|
||||
|
||||
cast_actions_dag.appendInputsForUnusedColumns(plan_to_add_cast.getCurrentDataStream().header);
|
||||
cast_actions_dag.appendInputsForUnusedColumns(plan_to_add_cast.getCurrentHeader());
|
||||
auto cast_join_columns_step
|
||||
= std::make_unique<ExpressionStep>(plan_to_add_cast.getCurrentDataStream(), std::move(cast_actions_dag));
|
||||
= std::make_unique<ExpressionStep>(plan_to_add_cast.getCurrentHeader(), std::move(cast_actions_dag));
|
||||
cast_join_columns_step->setStepDescription("Cast JOIN USING columns");
|
||||
plan_to_add_cast.addStep(std::move(cast_join_columns_step));
|
||||
};
|
||||
@ -1512,11 +1512,11 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
|
||||
}
|
||||
}
|
||||
|
||||
const Block & left_header = left_plan.getCurrentDataStream().header;
|
||||
const Block & left_header = left_plan.getCurrentHeader();
|
||||
auto left_table_names = left_header.getNames();
|
||||
NameSet left_table_names_set(left_table_names.begin(), left_table_names.end());
|
||||
|
||||
auto columns_from_joined_table = right_plan.getCurrentDataStream().header.getNamesAndTypesList();
|
||||
auto columns_from_joined_table = right_plan.getCurrentHeader().getNamesAndTypesList();
|
||||
table_join->setColumnsFromJoinedTable(columns_from_joined_table, left_table_names_set, "");
|
||||
|
||||
for (auto & column_from_joined_table : columns_from_joined_table)
|
||||
@ -1527,7 +1527,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
|
||||
table_join->addJoinedColumn(column_from_joined_table);
|
||||
}
|
||||
|
||||
const Block & right_header = right_plan.getCurrentDataStream().header;
|
||||
const Block & right_header = right_plan.getCurrentHeader();
|
||||
auto join_algorithm = chooseJoinAlgorithm(table_join, join_node.getRightTableExpression(), left_header, right_header, planner_context);
|
||||
|
||||
auto result_plan = QueryPlan();
|
||||
@ -1536,7 +1536,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
|
||||
if (is_filled_join)
|
||||
{
|
||||
auto filled_join_step
|
||||
= std::make_unique<FilledJoinStep>(left_plan.getCurrentDataStream(), join_algorithm, settings[Setting::max_block_size]);
|
||||
= std::make_unique<FilledJoinStep>(left_plan.getCurrentHeader(), join_algorithm, settings[Setting::max_block_size]);
|
||||
|
||||
filled_join_step->setStepDescription("Filled JOIN");
|
||||
left_plan.addStep(std::move(filled_join_step));
|
||||
@ -1555,7 +1555,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
|
||||
SortingStep::Settings sort_settings(*query_context);
|
||||
|
||||
auto sorting_step = std::make_unique<SortingStep>(
|
||||
plan.getCurrentDataStream(),
|
||||
plan.getCurrentHeader(),
|
||||
std::move(sort_description),
|
||||
0 /*limit*/,
|
||||
sort_settings);
|
||||
@ -1567,7 +1567,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
|
||||
auto add_create_set = [&settings, crosswise_connection](QueryPlan & plan, const Names & key_names, JoinTableSide join_table_side)
|
||||
{
|
||||
auto creating_set_step = std::make_unique<CreateSetAndFilterOnTheFlyStep>(
|
||||
plan.getCurrentDataStream(), key_names, settings[Setting::max_rows_in_set_to_optimize_join], crosswise_connection, join_table_side);
|
||||
plan.getCurrentHeader(), key_names, settings[Setting::max_rows_in_set_to_optimize_join], crosswise_connection, join_table_side);
|
||||
creating_set_step->setStepDescription(fmt::format("Create set and filter {} joined stream", join_table_side));
|
||||
|
||||
auto * step_raw_ptr = creating_set_step.get();
|
||||
@ -1598,8 +1598,8 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
|
||||
/// Sorting on a stream with const keys can start returning rows immediately and pipeline may stuck.
|
||||
/// Note: it's also doesn't work with the read-in-order optimization.
|
||||
/// No checks here because read in order is not applied if we have `CreateSetAndFilterOnTheFlyStep` in the pipeline between the reading and sorting steps.
|
||||
bool has_non_const_keys = has_non_const(left_plan.getCurrentDataStream().header, join_clause.key_names_left)
|
||||
&& has_non_const(right_plan.getCurrentDataStream().header, join_clause.key_names_right);
|
||||
bool has_non_const_keys = has_non_const(left_plan.getCurrentHeader(), join_clause.key_names_left)
|
||||
&& has_non_const(right_plan.getCurrentHeader(), join_clause.key_names_right);
|
||||
|
||||
if (settings[Setting::max_rows_in_set_to_optimize_join] > 0 && join_type_allows_filtering && has_non_const_keys)
|
||||
{
|
||||
@ -1619,8 +1619,8 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
|
||||
|
||||
auto join_pipeline_type = join_algorithm->pipelineType();
|
||||
auto join_step = std::make_unique<JoinStep>(
|
||||
left_plan.getCurrentDataStream(),
|
||||
right_plan.getCurrentDataStream(),
|
||||
left_plan.getCurrentHeader(),
|
||||
right_plan.getCurrentHeader(),
|
||||
std::move(join_algorithm),
|
||||
settings[Setting::max_block_size],
|
||||
settings[Setting::max_threads],
|
||||
@ -1635,7 +1635,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
|
||||
result_plan.unitePlans(std::move(join_step), {std::move(plans)});
|
||||
}
|
||||
|
||||
ActionsDAG drop_unused_columns_after_join_actions_dag(result_plan.getCurrentDataStream().header.getColumnsWithTypeAndName());
|
||||
ActionsDAG drop_unused_columns_after_join_actions_dag(result_plan.getCurrentHeader().getColumnsWithTypeAndName());
|
||||
ActionsDAG::NodeRawConstPtrs drop_unused_columns_after_join_actions_dag_updated_outputs;
|
||||
std::unordered_set<std::string_view> drop_unused_columns_after_join_actions_dag_updated_outputs_names;
|
||||
std::optional<size_t> first_skipped_column_node_index;
|
||||
@ -1672,7 +1672,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
|
||||
|
||||
drop_unused_columns_after_join_actions_dag_outputs = std::move(drop_unused_columns_after_join_actions_dag_updated_outputs);
|
||||
|
||||
auto drop_unused_columns_after_join_transform_step = std::make_unique<ExpressionStep>(result_plan.getCurrentDataStream(), std::move(drop_unused_columns_after_join_actions_dag));
|
||||
auto drop_unused_columns_after_join_transform_step = std::make_unique<ExpressionStep>(result_plan.getCurrentHeader(), std::move(drop_unused_columns_after_join_actions_dag));
|
||||
drop_unused_columns_after_join_transform_step->setStepDescription("DROP unused columns after JOIN");
|
||||
result_plan.addStep(std::move(drop_unused_columns_after_join_transform_step));
|
||||
|
||||
@ -1710,7 +1710,7 @@ JoinTreeQueryPlan buildQueryPlanForArrayJoinNode(const QueryTreeNodePtr & array_
|
||||
QueryProcessingStage::toString(join_tree_query_plan.from_stage));
|
||||
|
||||
auto plan = std::move(join_tree_query_plan.query_plan);
|
||||
auto plan_output_columns = plan.getCurrentDataStream().header.getColumnsWithTypeAndName();
|
||||
auto plan_output_columns = plan.getCurrentHeader().getColumnsWithTypeAndName();
|
||||
|
||||
ActionsDAG array_join_action_dag(plan_output_columns);
|
||||
PlannerActionsVisitor actions_visitor(planner_context);
|
||||
@ -1734,14 +1734,14 @@ JoinTreeQueryPlan buildQueryPlanForArrayJoinNode(const QueryTreeNodePtr & array_
|
||||
}
|
||||
}
|
||||
|
||||
array_join_action_dag.appendInputsForUnusedColumns(plan.getCurrentDataStream().header);
|
||||
array_join_action_dag.appendInputsForUnusedColumns(plan.getCurrentHeader());
|
||||
|
||||
auto array_join_actions = std::make_unique<ExpressionStep>(plan.getCurrentDataStream(), std::move(array_join_action_dag));
|
||||
auto array_join_actions = std::make_unique<ExpressionStep>(plan.getCurrentHeader(), std::move(array_join_action_dag));
|
||||
array_join_actions->setStepDescription("ARRAY JOIN actions");
|
||||
appendSetsFromActionsDAG(array_join_actions->getExpression(), join_tree_query_plan.useful_sets);
|
||||
plan.addStep(std::move(array_join_actions));
|
||||
|
||||
ActionsDAG drop_unused_columns_before_array_join_actions_dag(plan.getCurrentDataStream().header.getColumnsWithTypeAndName());
|
||||
ActionsDAG drop_unused_columns_before_array_join_actions_dag(plan.getCurrentHeader().getColumnsWithTypeAndName());
|
||||
ActionsDAG::NodeRawConstPtrs drop_unused_columns_before_array_join_actions_dag_updated_outputs;
|
||||
std::unordered_set<std::string_view> drop_unused_columns_before_array_join_actions_dag_updated_outputs_names;
|
||||
|
||||
@ -1765,14 +1765,14 @@ JoinTreeQueryPlan buildQueryPlanForArrayJoinNode(const QueryTreeNodePtr & array_
|
||||
|
||||
drop_unused_columns_before_array_join_actions_dag_outputs = std::move(drop_unused_columns_before_array_join_actions_dag_updated_outputs);
|
||||
|
||||
auto drop_unused_columns_before_array_join_transform_step = std::make_unique<ExpressionStep>(plan.getCurrentDataStream(),
|
||||
auto drop_unused_columns_before_array_join_transform_step = std::make_unique<ExpressionStep>(plan.getCurrentHeader(),
|
||||
std::move(drop_unused_columns_before_array_join_actions_dag));
|
||||
drop_unused_columns_before_array_join_transform_step->setStepDescription("DROP unused columns before ARRAY JOIN");
|
||||
plan.addStep(std::move(drop_unused_columns_before_array_join_transform_step));
|
||||
|
||||
const auto & settings = planner_context->getQueryContext()->getSettingsRef();
|
||||
auto array_join_step = std::make_unique<ArrayJoinStep>(
|
||||
plan.getCurrentDataStream(),
|
||||
plan.getCurrentHeader(),
|
||||
ArrayJoin{std::move(array_join_column_names), array_join_node.isLeft()},
|
||||
settings[Setting::enable_unaligned_array_join],
|
||||
settings[Setting::max_block_size]);
|
||||
|
@ -446,7 +446,7 @@ JoinTreeQueryPlan buildQueryPlanForParallelReplicas(
|
||||
/// header is a header which is returned by the follower.
|
||||
/// They are different because tables will have different aliases (e.g. _table1 or _table5).
|
||||
/// Here we just rename columns by position, with the hope the types would match.
|
||||
auto step = std::make_unique<ExpressionStep>(query_plan.getCurrentDataStream(), std::move(converting));
|
||||
auto step = std::make_unique<ExpressionStep>(query_plan.getCurrentHeader(), std::move(converting));
|
||||
step->setStepDescription("Convert distributed names");
|
||||
query_plan.addStep(std::move(step));
|
||||
|
||||
|
@ -274,7 +274,8 @@ size_t IRowInputFormat::countRows(size_t)
|
||||
|
||||
void IRowInputFormat::setSerializationHints(const SerializationInfoByName & hints)
|
||||
{
|
||||
serializations = getPort().getHeader().getSerializations(hints);
|
||||
if (supportsCustomSerializations())
|
||||
serializations = getPort().getHeader().getSerializations(hints);
|
||||
}
|
||||
|
||||
|
||||
|
@ -59,6 +59,7 @@ protected:
|
||||
/// `max_block_size` can be ignored.
|
||||
virtual size_t countRows(size_t max_block_size);
|
||||
virtual bool supportsCountRows() const { return false; }
|
||||
virtual bool supportsCustomSerializations() const { return false; }
|
||||
|
||||
virtual void readPrefix() {} /// delimiter before begin of result
|
||||
virtual void readSuffix() {} /// delimiter after end of result
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include <arrow/type_fwd.h>
|
||||
#include <boost/algorithm/string/case_conv.hpp>
|
||||
#include <Common/Exception.h>
|
||||
#include <parquet/metadata.h>
|
||||
|
||||
|
||||
namespace arrow
|
||||
@ -65,11 +66,22 @@ public:
|
||||
return result;
|
||||
}
|
||||
|
||||
// For a parquet schema {x: {i: int, j: int}}, this should be populated as follows
|
||||
// clickhouse_index = 0, parquet_indexes = {0, 1}
|
||||
struct ClickHouseIndexToParquetIndex
|
||||
{
|
||||
std::size_t clickhouse_index;
|
||||
std::vector<int> parquet_indexes;
|
||||
};
|
||||
|
||||
/// Only collect the required fields' indices. Eg. when just read a field of a struct,
|
||||
/// don't need to collect the whole indices in this struct.
|
||||
std::vector<int> findRequiredIndices(const Block & header, const arrow::Schema & schema)
|
||||
std::vector<ClickHouseIndexToParquetIndex> findRequiredIndices(
|
||||
const Block & header,
|
||||
const arrow::Schema & schema,
|
||||
const parquet::FileMetaData & file)
|
||||
{
|
||||
std::vector<int> required_indices;
|
||||
std::vector<ClickHouseIndexToParquetIndex> required_indices;
|
||||
std::unordered_set<int> added_indices;
|
||||
/// Flat all named fields' index information into a map.
|
||||
auto fields_indices = calculateFieldIndices(schema);
|
||||
@ -79,7 +91,7 @@ public:
|
||||
std::string col_name = named_col.name;
|
||||
if (ignore_case)
|
||||
boost::to_lower(col_name);
|
||||
findRequiredIndices(col_name, named_col.type, fields_indices, added_indices, required_indices);
|
||||
findRequiredIndices(col_name, i, named_col.type, fields_indices, added_indices, required_indices, file);
|
||||
}
|
||||
return required_indices;
|
||||
}
|
||||
@ -169,10 +181,12 @@ private:
|
||||
|
||||
void findRequiredIndices(
|
||||
const String & name,
|
||||
std::size_t header_index,
|
||||
DataTypePtr data_type,
|
||||
const std::unordered_map<std::string, std::pair<int, int>> & field_indices,
|
||||
std::unordered_set<int> & added_indices,
|
||||
std::vector<int> & required_indices)
|
||||
std::vector<ClickHouseIndexToParquetIndex> & required_indices,
|
||||
const parquet::FileMetaData & file)
|
||||
{
|
||||
auto nested_type = removeNullable(data_type);
|
||||
if (const DB::DataTypeTuple * type_tuple = typeid_cast<const DB::DataTypeTuple *>(nested_type.get()))
|
||||
@ -187,20 +201,20 @@ private:
|
||||
if (ignore_case)
|
||||
boost::to_lower(field_name);
|
||||
const auto & field_type = field_types[i];
|
||||
findRequiredIndices(Nested::concatenateName(name, field_name), field_type, field_indices, added_indices, required_indices);
|
||||
findRequiredIndices(Nested::concatenateName(name, field_name), header_index, field_type, field_indices, added_indices, required_indices, file);
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
else if (const auto * type_array = typeid_cast<const DB::DataTypeArray *>(nested_type.get()))
|
||||
{
|
||||
findRequiredIndices(name, type_array->getNestedType(), field_indices, added_indices, required_indices);
|
||||
findRequiredIndices(name, header_index, type_array->getNestedType(), field_indices, added_indices, required_indices, file);
|
||||
return;
|
||||
}
|
||||
else if (const auto * type_map = typeid_cast<const DB::DataTypeMap *>(nested_type.get()))
|
||||
{
|
||||
findRequiredIndices(name, type_map->getKeyType(), field_indices, added_indices, required_indices);
|
||||
findRequiredIndices(name, type_map->getValueType(), field_indices, added_indices, required_indices);
|
||||
findRequiredIndices(name, header_index, type_map->getKeyType(), field_indices, added_indices, required_indices, file);
|
||||
findRequiredIndices(name, header_index, type_map->getValueType(), field_indices, added_indices, required_indices, file);
|
||||
return;
|
||||
}
|
||||
auto it = field_indices.find(name);
|
||||
@ -211,14 +225,18 @@ private:
|
||||
}
|
||||
else
|
||||
{
|
||||
ClickHouseIndexToParquetIndex index_mapping;
|
||||
index_mapping.clickhouse_index = header_index;
|
||||
for (int j = 0; j < it->second.second; ++j)
|
||||
{
|
||||
auto index = it->second.first + j;
|
||||
if (added_indices.insert(index).second)
|
||||
{
|
||||
required_indices.emplace_back(index);
|
||||
index_mapping.parquet_indexes.emplace_back(index);
|
||||
}
|
||||
}
|
||||
|
||||
required_indices.emplace_back(index_mapping);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
@ -43,6 +43,7 @@ private:
|
||||
|
||||
size_t countRows(size_t max_block_size) override;
|
||||
bool supportsCountRows() const override { return true; }
|
||||
bool supportsCustomSerializations() const override { return true; }
|
||||
|
||||
const String & columnName(size_t i) const;
|
||||
size_t columnIndex(StringRef name, size_t key_index);
|
||||
|
@ -6,6 +6,7 @@
|
||||
# include <Columns/ColumnMap.h>
|
||||
# include <Columns/ColumnNullable.h>
|
||||
# include <Columns/ColumnString.h>
|
||||
# include <Columns/ColumnsCommon.h>
|
||||
# include <Columns/ColumnsDateTime.h>
|
||||
# include <Columns/ColumnsNumber.h>
|
||||
# include <DataTypes/DataTypeArray.h>
|
||||
@ -16,13 +17,13 @@
|
||||
# include <DataTypes/DataTypeIPv4andIPv6.h>
|
||||
# include <DataTypes/DataTypeLowCardinality.h>
|
||||
# include <DataTypes/DataTypeMap.h>
|
||||
# include <DataTypes/DataTypeNested.h>
|
||||
# include <DataTypes/DataTypeNullable.h>
|
||||
# include <DataTypes/DataTypeString.h>
|
||||
# include <DataTypes/DataTypeTuple.h>
|
||||
# include <DataTypes/DataTypesDecimal.h>
|
||||
# include <DataTypes/DataTypesNumber.h>
|
||||
# include <DataTypes/NestedUtils.h>
|
||||
# include <DataTypes/DataTypeNested.h>
|
||||
# include <Formats/FormatFactory.h>
|
||||
# include <Formats/SchemaInferenceUtils.h>
|
||||
# include <Formats/insertNullAsDefaultIfNeeded.h>
|
||||
@ -35,6 +36,8 @@
|
||||
# include <Common/FieldVisitorsAccurateComparison.h>
|
||||
# include "ArrowBufferedStreams.h"
|
||||
|
||||
# include <orc/Vector.hh>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -110,7 +113,21 @@ static const orc::Type * getORCTypeByName(const orc::Type & schema, const String
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
static DataTypePtr parseORCType(const orc::Type * orc_type, bool skip_columns_with_unsupported_types, bool & skipped)
|
||||
static bool isDictionaryEncoded(const orc::StripeInformation * stripe_info, const orc::Type * orc_type)
|
||||
{
|
||||
if (!stripe_info)
|
||||
return false;
|
||||
|
||||
auto encoding = stripe_info->getColumnEncoding(orc_type->getColumnId());
|
||||
return encoding == orc::ColumnEncodingKind_DICTIONARY || encoding == orc::ColumnEncodingKind_DICTIONARY_V2;
|
||||
}
|
||||
|
||||
static DataTypePtr parseORCType(
|
||||
const orc::Type * orc_type,
|
||||
bool skip_columns_with_unsupported_types,
|
||||
bool dictionary_as_low_cardinality,
|
||||
const orc::StripeInformation * stripe_info,
|
||||
bool & skipped)
|
||||
{
|
||||
assert(orc_type != nullptr);
|
||||
|
||||
@ -137,12 +154,22 @@ static DataTypePtr parseORCType(const orc::Type * orc_type, bool skip_columns_wi
|
||||
return std::make_shared<DataTypeDateTime64>(9);
|
||||
case orc::TypeKind::TIMESTAMP_INSTANT:
|
||||
return std::make_shared<DataTypeDateTime64>(9, "UTC");
|
||||
case orc::TypeKind::CHAR:
|
||||
case orc::TypeKind::VARCHAR:
|
||||
case orc::TypeKind::BINARY:
|
||||
case orc::TypeKind::STRING:
|
||||
return std::make_shared<DataTypeString>();
|
||||
case orc::TypeKind::CHAR:
|
||||
return std::make_shared<DataTypeFixedString>(orc_type->getMaximumLength());
|
||||
case orc::TypeKind::STRING: {
|
||||
DataTypePtr type;
|
||||
if (orc_type->getKind() == orc::TypeKind::CHAR)
|
||||
type = std::make_shared<DataTypeFixedString>(orc_type->getMaximumLength());
|
||||
else
|
||||
type = std::make_shared<DataTypeString>();
|
||||
|
||||
/// Wrap type in LowCardinality if ORC column is dictionary encoded and dictionary_as_low_cardinality is true
|
||||
if (dictionary_as_low_cardinality && isDictionaryEncoded(stripe_info, orc_type))
|
||||
type = std::make_shared<DataTypeLowCardinality>(type);
|
||||
|
||||
return type;
|
||||
}
|
||||
case orc::TypeKind::DECIMAL: {
|
||||
UInt64 precision = orc_type->getPrecision();
|
||||
UInt64 scale = orc_type->getScale();
|
||||
@ -157,7 +184,8 @@ static DataTypePtr parseORCType(const orc::Type * orc_type, bool skip_columns_wi
|
||||
if (subtype_count != 1)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid Orc List type {}", orc_type->toString());
|
||||
|
||||
DataTypePtr nested_type = parseORCType(orc_type->getSubtype(0), skip_columns_with_unsupported_types, skipped);
|
||||
DataTypePtr nested_type = parseORCType(
|
||||
orc_type->getSubtype(0), skip_columns_with_unsupported_types, dictionary_as_low_cardinality, stripe_info, skipped);
|
||||
if (skipped)
|
||||
return {};
|
||||
|
||||
@ -167,11 +195,12 @@ static DataTypePtr parseORCType(const orc::Type * orc_type, bool skip_columns_wi
|
||||
if (subtype_count != 2)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid Orc Map type {}", orc_type->toString());
|
||||
|
||||
DataTypePtr key_type = parseORCType(orc_type->getSubtype(0), skip_columns_with_unsupported_types, skipped);
|
||||
DataTypePtr key_type = parseORCType(
|
||||
orc_type->getSubtype(0), skip_columns_with_unsupported_types, dictionary_as_low_cardinality, stripe_info, skipped);
|
||||
if (skipped)
|
||||
return {};
|
||||
|
||||
DataTypePtr value_type = parseORCType(orc_type->getSubtype(1), skip_columns_with_unsupported_types, skipped);
|
||||
DataTypePtr value_type = parseORCType(orc_type->getSubtype(1), skip_columns_with_unsupported_types, dictionary_as_low_cardinality, stripe_info, skipped);
|
||||
if (skipped)
|
||||
return {};
|
||||
|
||||
@ -185,7 +214,8 @@ static DataTypePtr parseORCType(const orc::Type * orc_type, bool skip_columns_wi
|
||||
|
||||
for (size_t i = 0; i < orc_type->getSubtypeCount(); ++i)
|
||||
{
|
||||
auto parsed_type = parseORCType(orc_type->getSubtype(i), skip_columns_with_unsupported_types, skipped);
|
||||
auto parsed_type
|
||||
= parseORCType(orc_type->getSubtype(i), skip_columns_with_unsupported_types, dictionary_as_low_cardinality, stripe_info, skipped);
|
||||
if (skipped)
|
||||
return {};
|
||||
|
||||
@ -487,7 +517,7 @@ static void buildORCSearchArgumentImpl(
|
||||
/// For queries with where condition like "a > 10", if a column contains negative values such as "-1", pushing or not pushing
|
||||
/// down filters would result in different outputs.
|
||||
bool skipped = false;
|
||||
auto expect_type = makeNullableRecursively(parseORCType(orc_type, true, skipped));
|
||||
auto expect_type = makeNullableRecursively(parseORCType(orc_type, true, false, nullptr, skipped));
|
||||
const ColumnWithTypeAndName * column = header.findByName(column_name, format_settings.orc.case_insensitive_column_matching);
|
||||
if (!expect_type || !column)
|
||||
{
|
||||
@ -741,7 +771,7 @@ static const orc::Type * traverseDownORCTypeByName(
|
||||
if (orc::LIST == orc_type->getKind())
|
||||
{
|
||||
/// For cases in which header contains subcolumns flattened from nested columns.
|
||||
/// For example, "a Nested(x String, y Int64)" is flattened to "a.x Array(String), a.y Array(Int64)", and orc file schema is still "a array<struct<x string, y long>>".
|
||||
/// For example, "a Nested(x String, y Int64)" is flattened to "a.x Array(String), a.y Array(Int64)", and ORC file schema is still "a array<struct<x string, y long>>".
|
||||
/// In this case, we should skip possible array type and traverse down to its nested struct type.
|
||||
const auto * array_type = typeid_cast<const DataTypeArray *>(removeNullable(type).get());
|
||||
const auto * orc_nested_type = orc_type->getSubtype(0);
|
||||
@ -793,7 +823,7 @@ static void updateIncludeTypeIds(
|
||||
return;
|
||||
}
|
||||
case orc::STRUCT: {
|
||||
/// To make sure tuple field pruning work fine, we should include only the fields of orc struct type which are also contained in CH tuple types, instead of all fields of orc struct type.
|
||||
/// To make sure tuple field pruning work fine, we should include only the fields of ORC struct type which are also contained in CH tuple types, instead of all fields of ORC struct type.
|
||||
/// For example, CH tupe type in header is "x Tuple(a String)", ORC struct type is "x struct<a:string, b:long>", then only type id of field "x.a" should be included.
|
||||
/// For tuple field pruning purpose, we should never include "x.b" for it is not required in format header.
|
||||
const auto * tuple_type = typeid_cast<const DataTypeTuple *>(non_nullable_type.get());
|
||||
@ -860,11 +890,17 @@ void NativeORCBlockInputFormat::prepareFileReader()
|
||||
total_stripes = static_cast<int>(file_reader->getNumberOfStripes());
|
||||
current_stripe = -1;
|
||||
|
||||
|
||||
std::unique_ptr<orc::StripeInformation> stripe_info;
|
||||
if (file_reader->getNumberOfStripes())
|
||||
stripe_info = file_reader->getStripe(0);
|
||||
|
||||
orc_column_to_ch_column = std::make_unique<ORCColumnToCHColumn>(
|
||||
getPort().getHeader(),
|
||||
format_settings.orc.allow_missing_columns,
|
||||
format_settings.null_as_default,
|
||||
format_settings.orc.case_insensitive_column_matching);
|
||||
format_settings.orc.case_insensitive_column_matching,
|
||||
format_settings.orc.dictionary_as_low_cardinality);
|
||||
|
||||
const bool ignore_case = format_settings.orc.case_insensitive_column_matching;
|
||||
const auto & header = getPort().getHeader();
|
||||
@ -902,6 +938,7 @@ bool NativeORCBlockInputFormat::prepareStripeReader()
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "ORC stripe {} has no rows", current_stripe);
|
||||
|
||||
orc::RowReaderOptions row_reader_options;
|
||||
row_reader_options.setEnableLazyDecoding(format_settings.orc.dictionary_as_low_cardinality);
|
||||
row_reader_options.includeTypes(include_indices);
|
||||
row_reader_options.setTimezoneName(format_settings.orc.reader_time_zone_name);
|
||||
row_reader_options.range(current_stripe_info->getOffset(), current_stripe_info->getLength());
|
||||
@ -992,15 +1029,25 @@ NamesAndTypesList NativeORCSchemaReader::readSchema()
|
||||
std::atomic<int> is_stopped = 0;
|
||||
getFileReader(in, file_reader, format_settings, is_stopped);
|
||||
|
||||
|
||||
const auto & schema = file_reader->getType();
|
||||
Block header;
|
||||
std::unique_ptr<orc::StripeInformation> stripe_info;
|
||||
if (file_reader->getNumberOfStripes())
|
||||
stripe_info = file_reader->getStripe(0);
|
||||
|
||||
for (size_t i = 0; i < schema.getSubtypeCount(); ++i)
|
||||
{
|
||||
const std::string & name = schema.getFieldName(i);
|
||||
const orc::Type * orc_type = schema.getSubtype(i);
|
||||
|
||||
bool skipped = false;
|
||||
DataTypePtr type = parseORCType(orc_type, format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference, skipped);
|
||||
DataTypePtr type = parseORCType(
|
||||
orc_type,
|
||||
format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference,
|
||||
format_settings.orc.dictionary_as_low_cardinality,
|
||||
stripe_info.get(),
|
||||
skipped);
|
||||
if (!skipped)
|
||||
header.insert(ColumnWithTypeAndName{type, name});
|
||||
}
|
||||
@ -1011,11 +1058,16 @@ NamesAndTypesList NativeORCSchemaReader::readSchema()
|
||||
}
|
||||
|
||||
ORCColumnToCHColumn::ORCColumnToCHColumn(
|
||||
const Block & header_, bool allow_missing_columns_, bool null_as_default_, bool case_insensitive_matching_)
|
||||
const Block & header_,
|
||||
bool allow_missing_columns_,
|
||||
bool null_as_default_,
|
||||
bool case_insensitive_matching_,
|
||||
bool dictionary_as_low_cardinality_)
|
||||
: header(header_)
|
||||
, allow_missing_columns(allow_missing_columns_)
|
||||
, null_as_default(null_as_default_)
|
||||
, case_insensitive_matching(case_insensitive_matching_)
|
||||
, dictionary_as_low_cardinality(dictionary_as_low_cardinality_)
|
||||
{
|
||||
}
|
||||
|
||||
@ -1129,6 +1181,120 @@ readColumnWithNumericDataCast(const orc::ColumnVectorBatch * orc_column, const o
|
||||
return {std::move(internal_column), std::move(internal_type), column_name};
|
||||
}
|
||||
|
||||
template <bool fixed_string>
|
||||
static ColumnWithTypeAndName readColumnWithEncodedStringOrFixedStringData(
|
||||
const orc::ColumnVectorBatch * orc_column, const orc::Type * orc_type, const String & column_name, bool nullable)
|
||||
{
|
||||
/// Fill CH holder_column with ORC dictionary
|
||||
/// Note that holder_column is always a ColumnString or ColumnFixedstring whether nullable is true or false, because ORC dictionary doesn't contain null values.
|
||||
DataTypePtr holder_type;
|
||||
if constexpr (fixed_string)
|
||||
holder_type = std::make_shared<DataTypeFixedString>(orc_type->getMaximumLength());
|
||||
else
|
||||
holder_type = std::make_shared<DataTypeString>();
|
||||
|
||||
DataTypePtr nested_type = nullable ? std::make_shared<DataTypeNullable>(holder_type) : holder_type;
|
||||
auto internal_type = std::make_shared<DataTypeLowCardinality>(std::move(nested_type));
|
||||
|
||||
const auto & orc_str_column = dynamic_cast<const orc::EncodedStringVectorBatch &>(*orc_column);
|
||||
size_t rows = orc_str_column.numElements;
|
||||
const auto & orc_dict = *orc_str_column.dictionary;
|
||||
if (orc_dict.dictionaryOffset.size() <= 1)
|
||||
return {internal_type->createColumn(), internal_type, column_name};
|
||||
|
||||
size_t dict_size = orc_dict.dictionaryOffset.size() - 1;
|
||||
auto holder_column = holder_type->createColumn();
|
||||
if constexpr (fixed_string)
|
||||
{
|
||||
const size_t n = orc_type->getMaximumLength();
|
||||
auto & concrete_holder_column = assert_cast<ColumnFixedString &>(*holder_column);
|
||||
PaddedPODArray<UInt8> & column_chars_t = concrete_holder_column.getChars();
|
||||
size_t reserve_size = dict_size * n;
|
||||
column_chars_t.resize_exact(reserve_size);
|
||||
size_t curr_offset = 0;
|
||||
for (size_t i = 0; i < dict_size; ++i)
|
||||
{
|
||||
const auto * buf = orc_dict.dictionaryBlob.data() + orc_dict.dictionaryOffset[i];
|
||||
size_t buf_size = orc_dict.dictionaryOffset[i + 1] - orc_dict.dictionaryOffset[i];
|
||||
memcpy(&column_chars_t[curr_offset], buf, buf_size);
|
||||
curr_offset += n;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
auto & concrete_holder_column = assert_cast<ColumnString &>(*holder_column);
|
||||
PaddedPODArray<UInt8> & column_chars_t = concrete_holder_column.getChars();
|
||||
PaddedPODArray<UInt64> & column_offsets = concrete_holder_column.getOffsets();
|
||||
|
||||
size_t reserve_size = orc_dict.dictionaryBlob.size() + dict_size;
|
||||
column_chars_t.resize_exact(reserve_size);
|
||||
column_offsets.resize_exact(dict_size);
|
||||
size_t curr_offset = 0;
|
||||
for (size_t i = 0; i < dict_size; ++i)
|
||||
{
|
||||
const auto * buf = orc_dict.dictionaryBlob.data() + orc_dict.dictionaryOffset[i];
|
||||
size_t buf_size = orc_dict.dictionaryOffset[i + 1] - orc_dict.dictionaryOffset[i];
|
||||
memcpy(&column_chars_t[curr_offset], buf, buf_size);
|
||||
curr_offset += buf_size;
|
||||
|
||||
column_chars_t[curr_offset] = 0;
|
||||
++curr_offset;
|
||||
|
||||
column_offsets[i] = curr_offset;
|
||||
}
|
||||
}
|
||||
|
||||
/// Insert CH dictionary_column from holder_column
|
||||
auto tmp_internal_column = internal_type->createColumn();
|
||||
auto dictionary_column = IColumn::mutate(assert_cast<ColumnLowCardinality *>(tmp_internal_column.get())->getDictionaryPtr());
|
||||
auto index_column
|
||||
= dynamic_cast<IColumnUnique *>(dictionary_column.get())->uniqueInsertRangeFrom(*holder_column, 0, holder_column->size());
|
||||
|
||||
/// Fill index_column and wrap it with LowCardinality
|
||||
auto call_by_type = [&](auto index_type) -> MutableColumnPtr
|
||||
{
|
||||
using IndexType = decltype(index_type);
|
||||
const ColumnVector<IndexType> * concrete_index_column = checkAndGetColumn<ColumnVector<IndexType>>(index_column.get());
|
||||
if (!concrete_index_column)
|
||||
return nullptr;
|
||||
|
||||
const auto & index_data = concrete_index_column->getData();
|
||||
auto new_index_column = ColumnVector<IndexType>::create(rows);
|
||||
auto & new_index_data = dynamic_cast<ColumnVector<IndexType> &>(*new_index_column).getData();
|
||||
|
||||
if (!orc_str_column.hasNulls)
|
||||
{
|
||||
for (size_t i = 0; i < rows; ++i)
|
||||
{
|
||||
/// First map row index to ORC dictionary index, then map ORC dictionary index to CH dictionary index
|
||||
new_index_data[i] = index_data[orc_str_column.index[i]];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < rows; ++i)
|
||||
{
|
||||
/// Set index 0 if we meet null value. If dictionary_column is nullable, 0 represents null value.
|
||||
/// Otherwise 0 represents default string value, it is reasonable because null values are converted to default values when casting nullable column to non-nullable.
|
||||
new_index_data[i] = orc_str_column.notNull[i] ? index_data[orc_str_column.index[i]] : 0;
|
||||
}
|
||||
}
|
||||
|
||||
return ColumnLowCardinality::create(std::move(dictionary_column), std::move(new_index_column));
|
||||
};
|
||||
|
||||
MutableColumnPtr internal_column;
|
||||
if (!internal_column)
|
||||
internal_column = call_by_type(UInt8());
|
||||
if (!internal_column)
|
||||
internal_column = call_by_type(UInt16());
|
||||
if (!internal_column)
|
||||
internal_column = call_by_type(UInt32());
|
||||
if (!internal_column)
|
||||
internal_column = call_by_type(UInt64());
|
||||
return {std::move(internal_column), std::move(internal_type), column_name};
|
||||
}
|
||||
|
||||
static ColumnWithTypeAndName
|
||||
readColumnWithStringData(const orc::ColumnVectorBatch * orc_column, const orc::Type *, const String & column_name)
|
||||
{
|
||||
@ -1372,16 +1538,16 @@ readColumnWithTimestampData(const orc::ColumnVectorBatch * orc_column, const orc
|
||||
return {std::move(internal_column), std::move(internal_type), column_name};
|
||||
}
|
||||
|
||||
static ColumnWithTypeAndName readColumnFromORCColumn(
|
||||
ColumnWithTypeAndName ORCColumnToCHColumn::readColumnFromORCColumn(
|
||||
const orc::ColumnVectorBatch * orc_column,
|
||||
const orc::Type * orc_type,
|
||||
const std::string & column_name,
|
||||
bool inside_nullable,
|
||||
DataTypePtr type_hint = nullptr)
|
||||
DataTypePtr type_hint) const
|
||||
{
|
||||
bool skipped = false;
|
||||
|
||||
if (!inside_nullable && (orc_column->hasNulls || (type_hint && type_hint->isNullable()))
|
||||
if (!inside_nullable && (orc_column->hasNulls || (type_hint && type_hint->isNullable())) && !orc_column->isEncoded
|
||||
&& (orc_type->getKind() != orc::LIST && orc_type->getKind() != orc::MAP && orc_type->getKind() != orc::STRUCT))
|
||||
{
|
||||
DataTypePtr nested_type_hint;
|
||||
@ -1423,7 +1589,14 @@ static ColumnWithTypeAndName readColumnFromORCColumn(
|
||||
default:;
|
||||
}
|
||||
}
|
||||
return readColumnWithStringData(orc_column, orc_type, column_name);
|
||||
|
||||
if (orc_column->isEncoded && dictionary_as_low_cardinality)
|
||||
{
|
||||
bool nullable = type_hint ? isNullableOrLowCardinalityNullable(type_hint) : true;
|
||||
return readColumnWithEncodedStringOrFixedStringData<false>(orc_column, orc_type, column_name, nullable);
|
||||
}
|
||||
else
|
||||
return readColumnWithStringData(orc_column, orc_type, column_name);
|
||||
}
|
||||
case orc::CHAR: {
|
||||
if (type_hint)
|
||||
@ -1441,7 +1614,14 @@ static ColumnWithTypeAndName readColumnFromORCColumn(
|
||||
default:;
|
||||
}
|
||||
}
|
||||
return readColumnWithFixedStringData(orc_column, orc_type, column_name);
|
||||
|
||||
if (orc_column->isEncoded && dictionary_as_low_cardinality)
|
||||
{
|
||||
bool nullable = type_hint ? isNullableOrLowCardinalityNullable(type_hint) : true;
|
||||
return readColumnWithEncodedStringOrFixedStringData<true>(orc_column, orc_type, column_name, nullable);
|
||||
}
|
||||
else
|
||||
return readColumnWithFixedStringData(orc_column, orc_type, column_name);
|
||||
}
|
||||
case orc::BOOLEAN:
|
||||
return readColumnWithBooleanData(orc_column, orc_type, column_name);
|
||||
@ -1468,7 +1648,7 @@ static ColumnWithTypeAndName readColumnFromORCColumn(
|
||||
case orc::TIMESTAMP_INSTANT:
|
||||
return readColumnWithTimestampData(orc_column, orc_type, column_name);
|
||||
case orc::DECIMAL: {
|
||||
auto interal_type = parseORCType(orc_type, false, skipped);
|
||||
auto interal_type = parseORCType(orc_type, false, false, nullptr, skipped);
|
||||
|
||||
auto precision = orc_type->getPrecision();
|
||||
if (precision == 0)
|
||||
|
@ -111,7 +111,12 @@ public:
|
||||
using ORCColumnWithType = std::pair<ORCColumnPtr, ORCTypePtr>;
|
||||
using NameToColumnPtr = std::unordered_map<std::string, ORCColumnWithType>;
|
||||
|
||||
ORCColumnToCHColumn(const Block & header_, bool allow_missing_columns_, bool null_as_default_, bool case_insensitive_matching_ = false);
|
||||
ORCColumnToCHColumn(
|
||||
const Block & header_,
|
||||
bool allow_missing_columns_,
|
||||
bool null_as_default_,
|
||||
bool case_insensitive_matching_ = false,
|
||||
bool dictionary_as_low_cardinality_ = false);
|
||||
|
||||
void orcTableToCHChunk(
|
||||
Chunk & res,
|
||||
@ -124,11 +129,19 @@ public:
|
||||
Chunk & res, NameToColumnPtr & name_to_column_ptr, size_t num_rows, BlockMissingValues * block_missing_values = nullptr);
|
||||
|
||||
private:
|
||||
ColumnWithTypeAndName readColumnFromORCColumn(
|
||||
const orc::ColumnVectorBatch * orc_column,
|
||||
const orc::Type * orc_type,
|
||||
const std::string & column_name,
|
||||
bool inside_nullable,
|
||||
DataTypePtr type_hint = nullptr) const;
|
||||
|
||||
const Block & header;
|
||||
/// If false, throw exception if some columns in header not exists in arrow table.
|
||||
bool allow_missing_columns;
|
||||
bool null_as_default;
|
||||
bool case_insensitive_matching;
|
||||
bool dictionary_as_low_cardinality;
|
||||
};
|
||||
}
|
||||
#endif
|
||||
|
@ -0,0 +1,525 @@
|
||||
#include <Processors/Formats/Impl/Parquet/ParquetBloomFilterCondition.h>
|
||||
#include <iostream>
|
||||
|
||||
#if USE_PARQUET
|
||||
|
||||
#include <parquet/bloom_filter.h>
|
||||
#include <parquet/xxhasher.h>
|
||||
#include <Interpreters/convertFieldToType.h>
|
||||
#include <Columns/ColumnConst.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
bool isParquetStringTypeSupportedForBloomFilters(
|
||||
const std::shared_ptr<const parquet::LogicalType> & logical_type,
|
||||
parquet::ConvertedType::type converted_type)
|
||||
{
|
||||
if (logical_type &&
|
||||
!logical_type->is_none()
|
||||
&& !(logical_type->is_string() || logical_type->is_BSON() || logical_type->is_JSON()))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (parquet::ConvertedType::type::NONE != converted_type &&
|
||||
!(converted_type == parquet::ConvertedType::JSON || converted_type == parquet::ConvertedType::UTF8
|
||||
|| converted_type == parquet::ConvertedType::BSON))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool isParquetIntegerTypeSupportedForBloomFilters(const std::shared_ptr<const parquet::LogicalType> & logical_type, parquet::ConvertedType::type converted_type)
|
||||
{
|
||||
if (logical_type && !logical_type->is_none() && !logical_type->is_int())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (parquet::ConvertedType::type::NONE != converted_type && !(converted_type == parquet::ConvertedType::INT_8 || converted_type == parquet::ConvertedType::INT_16
|
||||
|| converted_type == parquet::ConvertedType::INT_32 || converted_type == parquet::ConvertedType::INT_64
|
||||
|| converted_type == parquet::ConvertedType::UINT_8 || converted_type == parquet::ConvertedType::UINT_16
|
||||
|| converted_type == parquet::ConvertedType::UINT_32 || converted_type == parquet::ConvertedType::UINT_64))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
uint64_t hashSpecialFLBATypes(const Field & field)
|
||||
{
|
||||
const T & value = field.safeGet<T>();
|
||||
|
||||
parquet::FLBA flba(reinterpret_cast<const uint8_t*>(&value));
|
||||
|
||||
parquet::XxHasher hasher;
|
||||
|
||||
return hasher.Hash(&flba, sizeof(T));
|
||||
};
|
||||
|
||||
std::optional<uint64_t> tryHashStringWithoutCompatibilityCheck(const Field & field)
|
||||
{
|
||||
const auto field_type = field.getType();
|
||||
|
||||
if (field_type != Field::Types::Which::String)
|
||||
{
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
parquet::XxHasher hasher;
|
||||
parquet::ByteArray ba { field.safeGet<std::string>() };
|
||||
|
||||
return hasher.Hash(&ba);
|
||||
}
|
||||
|
||||
std::optional<uint64_t> tryHashString(
|
||||
const Field & field,
|
||||
const std::shared_ptr<const parquet::LogicalType> & logical_type,
|
||||
parquet::ConvertedType::type converted_type)
|
||||
{
|
||||
if (!isParquetStringTypeSupportedForBloomFilters(logical_type, converted_type))
|
||||
{
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
return tryHashStringWithoutCompatibilityCheck(field);
|
||||
}
|
||||
|
||||
std::optional<uint64_t> tryHashFLBA(
|
||||
const Field & field,
|
||||
const std::shared_ptr<const parquet::LogicalType> & logical_type,
|
||||
parquet::ConvertedType::type converted_type,
|
||||
std::size_t parquet_column_length)
|
||||
{
|
||||
if (!isParquetStringTypeSupportedForBloomFilters(logical_type, converted_type))
|
||||
{
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
const auto field_type = field.getType();
|
||||
|
||||
if (field_type == Field::Types::Which::IPv6 && parquet_column_length == sizeof(IPv6))
|
||||
{
|
||||
return hashSpecialFLBATypes<IPv6>(field);
|
||||
}
|
||||
|
||||
return tryHashStringWithoutCompatibilityCheck(field);
|
||||
}
|
||||
|
||||
template <typename ParquetPhysicalType>
|
||||
std::optional<uint64_t> tryHashInt(const Field & field, const std::shared_ptr<const parquet::LogicalType> & logical_type, parquet::ConvertedType::type converted_type)
|
||||
{
|
||||
if (!isParquetIntegerTypeSupportedForBloomFilters(logical_type, converted_type))
|
||||
{
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
parquet::XxHasher hasher;
|
||||
|
||||
if (field.getType() == Field::Types::Which::Int64)
|
||||
{
|
||||
return hasher.Hash(static_cast<ParquetPhysicalType>(field.safeGet<int64_t>()));
|
||||
}
|
||||
else if (field.getType() == Field::Types::Which::UInt64)
|
||||
{
|
||||
return hasher.Hash(static_cast<ParquetPhysicalType>(field.safeGet<uint64_t>()));
|
||||
}
|
||||
else if (field.getType() == Field::Types::IPv4)
|
||||
{
|
||||
/*
|
||||
* In theory, we could accept IPv4 over 64 bits variables. It would only be a problem in case it was hashed using the byte array api
|
||||
* with a zero-ed buffer that had a 32 bits variable copied into it.
|
||||
*
|
||||
* To be on the safe side, accept only in case physical type is 32 bits.
|
||||
* */
|
||||
if constexpr (std::is_same_v<int32_t, ParquetPhysicalType>)
|
||||
{
|
||||
return hasher.Hash(static_cast<ParquetPhysicalType>(field.safeGet<IPv4>()));
|
||||
}
|
||||
}
|
||||
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
std::optional<uint64_t> tryHash(const Field & field, const parquet::ColumnDescriptor * parquet_column_descriptor)
|
||||
{
|
||||
const auto physical_type = parquet_column_descriptor->physical_type();
|
||||
const auto & logical_type = parquet_column_descriptor->logical_type();
|
||||
const auto converted_type = parquet_column_descriptor->converted_type();
|
||||
|
||||
switch (physical_type)
|
||||
{
|
||||
case parquet::Type::type::INT32:
|
||||
return tryHashInt<int32_t>(field, logical_type, converted_type);
|
||||
case parquet::Type::type::INT64:
|
||||
return tryHashInt<int64_t>(field, logical_type, converted_type);
|
||||
case parquet::Type::type::BYTE_ARRAY:
|
||||
return tryHashString(field, logical_type, converted_type);
|
||||
case parquet::Type::type::FIXED_LEN_BYTE_ARRAY:
|
||||
return tryHashFLBA(field, logical_type, converted_type, parquet_column_descriptor->type_length());
|
||||
default:
|
||||
return std::nullopt;
|
||||
}
|
||||
}
|
||||
|
||||
std::optional<std::vector<uint64_t>> hash(const IColumn * data_column, const parquet::ColumnDescriptor * parquet_column_descriptor)
|
||||
{
|
||||
std::vector<uint64_t> hashes;
|
||||
|
||||
for (size_t i = 0u; i < data_column->size(); i++)
|
||||
{
|
||||
Field f;
|
||||
data_column->get(i, f);
|
||||
|
||||
auto hashed_value = tryHash(f, parquet_column_descriptor);
|
||||
|
||||
if (!hashed_value)
|
||||
{
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
hashes.emplace_back(*hashed_value);
|
||||
}
|
||||
|
||||
return hashes;
|
||||
}
|
||||
|
||||
bool maybeTrueOnBloomFilter(const std::vector<uint64_t> & hashes, const std::unique_ptr<parquet::BloomFilter> & bloom_filter)
|
||||
{
|
||||
for (const auto hash : hashes)
|
||||
{
|
||||
if (bloom_filter->FindHash(hash))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
const parquet::ColumnDescriptor * getColumnDescriptorIfBloomFilterIsPresent(
|
||||
const std::unique_ptr<parquet::RowGroupMetaData> & parquet_rg_metadata,
|
||||
const std::vector<ArrowFieldIndexUtil::ClickHouseIndexToParquetIndex> & clickhouse_column_index_to_parquet_index,
|
||||
std::size_t clickhouse_column_index)
|
||||
{
|
||||
if (clickhouse_column_index_to_parquet_index.size() <= clickhouse_column_index)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const auto & parquet_indexes = clickhouse_column_index_to_parquet_index[clickhouse_column_index].parquet_indexes;
|
||||
|
||||
// complex types like structs, tuples and maps will have more than one index.
|
||||
// we don't support those for now
|
||||
if (parquet_indexes.size() > 1)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (parquet_indexes.empty())
|
||||
{
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Something bad happened, raise an issue and try the query with `input_format_parquet_bloom_filter_push_down=false`");
|
||||
}
|
||||
|
||||
auto parquet_column_index = parquet_indexes[0];
|
||||
|
||||
const auto * parquet_column_descriptor = parquet_rg_metadata->schema()->Column(parquet_column_index);
|
||||
|
||||
bool column_has_bloom_filter = parquet_rg_metadata->ColumnChunk(parquet_column_index)->bloom_filter_offset().has_value();
|
||||
if (!column_has_bloom_filter)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return parquet_column_descriptor;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
ParquetBloomFilterCondition::ParquetBloomFilterCondition(const std::vector<ConditionElement> & condition_, const Block & header_)
|
||||
: condition(condition_), header(header_)
|
||||
{
|
||||
}
|
||||
|
||||
bool ParquetBloomFilterCondition::mayBeTrueOnRowGroup(const ColumnIndexToBF & column_index_to_column_bf) const
|
||||
{
|
||||
using Function = ConditionElement::Function;
|
||||
std::vector<BoolMask> rpn_stack;
|
||||
|
||||
for (const auto & element : condition)
|
||||
{
|
||||
if (element.function == Function::FUNCTION_IN
|
||||
|| element.function == Function::FUNCTION_NOT_IN)
|
||||
{
|
||||
bool maybe_true = true;
|
||||
for (auto column_index = 0u; column_index < element.hashes_per_column.size(); column_index++)
|
||||
{
|
||||
// in case bloom filter is not present for this row group
|
||||
// https://github.com/ClickHouse/ClickHouse/pull/62966#discussion_r1722361237
|
||||
if (!column_index_to_column_bf.contains(element.key_columns[column_index]))
|
||||
{
|
||||
rpn_stack.emplace_back(true, true);
|
||||
continue;
|
||||
}
|
||||
|
||||
bool column_maybe_contains = maybeTrueOnBloomFilter(
|
||||
element.hashes_per_column[column_index],
|
||||
column_index_to_column_bf.at(element.key_columns[column_index]));
|
||||
|
||||
if (!column_maybe_contains)
|
||||
{
|
||||
maybe_true = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
rpn_stack.emplace_back(maybe_true, true);
|
||||
if (element.function == Function::FUNCTION_NOT_IN)
|
||||
rpn_stack.back() = !rpn_stack.back();
|
||||
}
|
||||
else if (element.function == Function::FUNCTION_NOT)
|
||||
{
|
||||
rpn_stack.back() = !rpn_stack.back();
|
||||
}
|
||||
else if (element.function == Function::FUNCTION_OR)
|
||||
{
|
||||
auto arg1 = rpn_stack.back();
|
||||
rpn_stack.pop_back();
|
||||
auto arg2 = rpn_stack.back();
|
||||
rpn_stack.back() = arg1 | arg2;
|
||||
}
|
||||
else if (element.function == Function::FUNCTION_AND)
|
||||
{
|
||||
auto arg1 = rpn_stack.back();
|
||||
rpn_stack.pop_back();
|
||||
auto arg2 = rpn_stack.back();
|
||||
rpn_stack.back() = arg1 & arg2;
|
||||
}
|
||||
else if (element.function == Function::ALWAYS_TRUE)
|
||||
{
|
||||
rpn_stack.emplace_back(true, false);
|
||||
}
|
||||
else if (element.function == Function::ALWAYS_FALSE)
|
||||
{
|
||||
rpn_stack.emplace_back(false, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
rpn_stack.emplace_back(true, true);
|
||||
}
|
||||
}
|
||||
|
||||
if (rpn_stack.size() != 1)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected stack size in KeyCondition::mayBeTrueOnRowGroup");
|
||||
|
||||
return rpn_stack[0].can_be_true;
|
||||
}
|
||||
|
||||
std::unordered_set<std::size_t> ParquetBloomFilterCondition::getFilteringColumnKeys() const
|
||||
{
|
||||
std::unordered_set<std::size_t> column_keys;
|
||||
|
||||
for (const auto & element : condition)
|
||||
{
|
||||
for (const auto index : element.key_columns)
|
||||
{
|
||||
column_keys.insert(index);
|
||||
}
|
||||
}
|
||||
|
||||
return column_keys;
|
||||
}
|
||||
|
||||
/*
|
||||
* `KeyCondition::rpn` is overly complex for bloom filters, some operations are not even supported. Not only that, but to avoid hashing each time
|
||||
* we loop over a rpn element, we need to store hashes instead of where predicate values. To address this, we loop over `KeyCondition::rpn`
|
||||
* and build a simplified RPN that holds hashes instead of values.
|
||||
*
|
||||
* `KeyCondition::RPNElement::FUNCTION_IN_RANGE` becomes:
|
||||
* `FUNCTION_IN`
|
||||
* `FUNCTION_UNKNOWN` when range limits are different
|
||||
* `KeyCondition::RPNElement::FUNCTION_IN_SET` becomes
|
||||
* `FUNCTION_IN`
|
||||
*
|
||||
* Complex types and structs are not supported.
|
||||
* There are two sources of data types being analyzed, and they need to be compatible: DB::Field type and parquet type.
|
||||
* This is determined by the `isColumnSupported` method.
|
||||
*
|
||||
* Some interesting examples:
|
||||
* 1. file(..., 'str_column UInt64') where str_column = 50; Field.type == UInt64. Parquet type string. Not supported.
|
||||
* 2. file(...) where str_column = 50; Field.type == String (conversion already taken care by `KeyCondition`). Parquet type string.
|
||||
* 3. file(...) where uint32_column = toIPv4(5). Field.type == IPv4. Incompatible column types, resolved by `KeyCondition` itself.
|
||||
* 4. file(...) where toIPv4(uint32_column) = toIPv4(5). Field.type == IPv4. We know it is safe to hash it using an int32 API.
|
||||
* */
|
||||
std::vector<ParquetBloomFilterCondition::ConditionElement> keyConditionRPNToParquetBloomFilterCondition(
|
||||
const std::vector<KeyCondition::RPNElement> & rpn,
|
||||
const std::vector<ArrowFieldIndexUtil::ClickHouseIndexToParquetIndex> & clickhouse_column_index_to_parquet_index,
|
||||
const std::unique_ptr<parquet::RowGroupMetaData> & parquet_rg_metadata)
|
||||
{
|
||||
std::vector<ParquetBloomFilterCondition::ConditionElement> condition_elements;
|
||||
|
||||
using RPNElement = KeyCondition::RPNElement;
|
||||
using Function = ParquetBloomFilterCondition::ConditionElement::Function;
|
||||
|
||||
for (const auto & rpn_element : rpn)
|
||||
{
|
||||
// this would be a problem for `where negate(x) = -58`.
|
||||
// It would perform a bf search on `-58`, and possibly miss row groups containing this data.
|
||||
if (!rpn_element.monotonic_functions_chain.empty())
|
||||
{
|
||||
condition_elements.emplace_back(Function::FUNCTION_UNKNOWN);
|
||||
continue;
|
||||
}
|
||||
|
||||
ParquetBloomFilterCondition::ConditionElement::HashesForColumns hashes;
|
||||
|
||||
if (rpn_element.function == RPNElement::FUNCTION_IN_RANGE
|
||||
|| rpn_element.function == RPNElement::FUNCTION_NOT_IN_RANGE)
|
||||
{
|
||||
// Only FUNCTION_EQUALS is supported and for that extremes need to be the same
|
||||
if (rpn_element.range.left != rpn_element.range.right)
|
||||
{
|
||||
condition_elements.emplace_back(Function::FUNCTION_UNKNOWN);
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto * parquet_column_descriptor =
|
||||
getColumnDescriptorIfBloomFilterIsPresent(parquet_rg_metadata, clickhouse_column_index_to_parquet_index, rpn_element.key_column);
|
||||
|
||||
if (!parquet_column_descriptor)
|
||||
{
|
||||
condition_elements.emplace_back(Function::FUNCTION_UNKNOWN);
|
||||
continue;
|
||||
}
|
||||
|
||||
auto hashed_value = tryHash(rpn_element.range.left, parquet_column_descriptor);
|
||||
|
||||
if (!hashed_value)
|
||||
{
|
||||
condition_elements.emplace_back(Function::FUNCTION_UNKNOWN);
|
||||
continue;
|
||||
}
|
||||
|
||||
std::vector<uint64_t> hashes_for_column;
|
||||
hashes_for_column.emplace_back(*hashed_value);
|
||||
|
||||
hashes.emplace_back(std::move(hashes_for_column));
|
||||
|
||||
auto function = rpn_element.function == RPNElement::FUNCTION_IN_RANGE
|
||||
? ParquetBloomFilterCondition::ConditionElement::Function::FUNCTION_IN
|
||||
: ParquetBloomFilterCondition::ConditionElement::Function::FUNCTION_NOT_IN;
|
||||
|
||||
std::vector<std::size_t> key_columns;
|
||||
key_columns.emplace_back(rpn_element.key_column);
|
||||
|
||||
condition_elements.emplace_back(function, std::move(hashes), std::move(key_columns));
|
||||
}
|
||||
else if (rpn_element.function == RPNElement::FUNCTION_IN_SET
|
||||
|| rpn_element.function == RPNElement::FUNCTION_NOT_IN_SET)
|
||||
{
|
||||
const auto & set_index = rpn_element.set_index;
|
||||
const auto & ordered_set = set_index->getOrderedSet();
|
||||
const auto & indexes_mapping = set_index->getIndexesMapping();
|
||||
bool found_empty_column = false;
|
||||
|
||||
std::vector<std::size_t> key_columns;
|
||||
|
||||
for (auto i = 0u; i < ordered_set.size(); i++)
|
||||
{
|
||||
const auto & set_column = ordered_set[i];
|
||||
|
||||
const auto * parquet_column_descriptor = getColumnDescriptorIfBloomFilterIsPresent(
|
||||
parquet_rg_metadata,
|
||||
clickhouse_column_index_to_parquet_index,
|
||||
indexes_mapping[i].key_index);
|
||||
|
||||
if (!parquet_column_descriptor)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
auto column = set_column;
|
||||
|
||||
if (column->empty())
|
||||
{
|
||||
found_empty_column = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (const auto & nullable_column = checkAndGetColumn<ColumnNullable>(set_column.get()))
|
||||
{
|
||||
column = nullable_column->getNestedColumnPtr();
|
||||
}
|
||||
|
||||
auto hashes_for_column_opt = hash(column.get(), parquet_column_descriptor);
|
||||
|
||||
if (!hashes_for_column_opt)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
auto & hashes_for_column = *hashes_for_column_opt;
|
||||
|
||||
if (hashes_for_column.empty())
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
hashes.emplace_back(hashes_for_column);
|
||||
|
||||
key_columns.push_back(indexes_mapping[i].key_index);
|
||||
}
|
||||
|
||||
if (found_empty_column)
|
||||
{
|
||||
condition_elements.emplace_back(Function::ALWAYS_FALSE);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (hashes.empty())
|
||||
{
|
||||
condition_elements.emplace_back(Function::FUNCTION_UNKNOWN);
|
||||
continue;
|
||||
}
|
||||
|
||||
auto function = RPNElement::FUNCTION_IN_SET == rpn_element.function ? Function::FUNCTION_IN : Function::FUNCTION_NOT_IN;
|
||||
|
||||
condition_elements.emplace_back(function, hashes, key_columns);
|
||||
}
|
||||
else if (rpn_element.function == RPNElement::FUNCTION_NOT)
|
||||
{
|
||||
condition_elements.emplace_back(Function::FUNCTION_NOT);
|
||||
}
|
||||
else if (rpn_element.function == RPNElement::FUNCTION_OR)
|
||||
{
|
||||
condition_elements.emplace_back(Function::FUNCTION_OR);
|
||||
}
|
||||
else if (rpn_element.function == RPNElement::FUNCTION_AND)
|
||||
{
|
||||
condition_elements.emplace_back(Function::FUNCTION_AND);
|
||||
}
|
||||
else
|
||||
{
|
||||
condition_elements.emplace_back(Function::ALWAYS_TRUE);
|
||||
}
|
||||
}
|
||||
|
||||
return condition_elements;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user