Merge remote-tracking branch 'origin/master' into pr-custom-key-failover

This commit is contained in:
Igor Nikonov 2023-11-28 18:07:04 +00:00
commit a06a71834a
261 changed files with 4678 additions and 1497 deletions

View File

@ -4,8 +4,8 @@ if (FUZZER)
# NOTE: Eldar Zaitov decided to name it "libfuzzer" instead of "fuzzer" to keep in mind another possible fuzzer backends.
# NOTE: no-link means that all the targets are built with instrumentation for fuzzer, but only some of them
# (tests) have entry point for fuzzer and it's not checked.
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} -fsanitize=fuzzer-no-link")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} -fsanitize=fuzzer-no-link")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} -fsanitize=fuzzer-no-link -DFUZZER=1")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} -fsanitize=fuzzer-no-link -DFUZZER=1")
# NOTE: oss-fuzz can change LIB_FUZZING_ENGINE variable
if (NOT LIB_FUZZING_ENGINE)

View File

@ -385,9 +385,25 @@ endif ()
include("${ClickHouse_SOURCE_DIR}/contrib/google-protobuf-cmake/protobuf_generate.cmake")
# These files needs to be installed to make it possible that users can use well-known protobuf types
set(google_proto_files
${protobuf_source_dir}/src/google/protobuf/any.proto
${protobuf_source_dir}/src/google/protobuf/api.proto
${protobuf_source_dir}/src/google/protobuf/descriptor.proto
${protobuf_source_dir}/src/google/protobuf/duration.proto
${protobuf_source_dir}/src/google/protobuf/empty.proto
${protobuf_source_dir}/src/google/protobuf/field_mask.proto
${protobuf_source_dir}/src/google/protobuf/source_context.proto
${protobuf_source_dir}/src/google/protobuf/struct.proto
${protobuf_source_dir}/src/google/protobuf/timestamp.proto
${protobuf_source_dir}/src/google/protobuf/type.proto
${protobuf_source_dir}/src/google/protobuf/wrappers.proto
)
add_library(_protobuf INTERFACE)
target_link_libraries(_protobuf INTERFACE _libprotobuf)
target_include_directories(_protobuf INTERFACE "${Protobuf_INCLUDE_DIR}")
set_target_properties(_protobuf PROPERTIES google_proto_files "${google_proto_files}")
add_library(ch_contrib::protobuf ALIAS _protobuf)
add_library(_protoc INTERFACE)

2
contrib/libunwind vendored

@ -1 +1 @@
Subproject commit 30cc1d3fd3655a5cfa0ab112fe320fb9fc0a8344
Subproject commit 40d8eadf96b127d9b22d53ce7a4fc52aaedea965

View File

@ -20,15 +20,7 @@ set(LIBUNWIND_ASM_SOURCES
"${LIBUNWIND_SOURCE_DIR}/src/UnwindRegistersRestore.S"
"${LIBUNWIND_SOURCE_DIR}/src/UnwindRegistersSave.S")
# CMake doesn't pass the correct architecture for Apple prior to CMake 3.19 [1]
# Workaround these two issues by compiling as C.
#
# [1]: https://gitlab.kitware.com/cmake/cmake/-/issues/20771
if (APPLE AND CMAKE_VERSION VERSION_LESS 3.19)
set_source_files_properties(${LIBUNWIND_ASM_SOURCES} PROPERTIES LANGUAGE C)
else()
enable_language(ASM)
endif()
enable_language(ASM)
set(LIBUNWIND_SOURCES
${LIBUNWIND_CXX_SOURCES}

View File

@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="23.10.4.25"
ARG VERSION="23.10.5.20"
ARG PACKAGES="clickhouse-keeper"
# user/group precreated explicitly with fixed uid/gid on purpose.

View File

@ -1,8 +0,0 @@
# post / preinstall scripts (not needed, we do it in Dockerfile)
alpine-root/install/*
# docs (looks useless)
alpine-root/usr/share/doc/*
# packages, etc. (used by alpine-build.sh)
tgz-packages/*

View File

@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="23.10.4.25"
ARG VERSION="23.10.5.20"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# user/group precreated explicitly with fixed uid/gid on purpose.

View File

@ -30,7 +30,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
ARG VERSION="23.10.4.25"
ARG VERSION="23.10.5.20"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# set non-empty deb_location_url url to create a docker image

View File

@ -126,6 +126,9 @@ function setup_logs_replication
# It's doesn't make sense to try creating tables if SYNC fails
echo "SYSTEM SYNC DATABASE REPLICA default" | clickhouse-client "${CONNECTION_ARGS[@]}" || return 0
debug_or_sanitizer_build=$(clickhouse-client -q "WITH ((SELECT value FROM system.build_options WHERE name='BUILD_TYPE') AS build, (SELECT value FROM system.build_options WHERE name='CXX_FLAGS') as flags) SELECT build='Debug' OR flags LIKE '%fsanitize%'")
echo "Build is debug or sanitizer: $debug_or_sanitizer_build"
# For each system log table:
echo 'Create %_log tables'
clickhouse-client --query "SHOW TABLES FROM system LIKE '%\\_log'" | while read -r table
@ -133,7 +136,14 @@ function setup_logs_replication
if [[ "$table" = "trace_log" ]]
then
EXTRA_COLUMNS_FOR_TABLE="${EXTRA_COLUMNS_TRACE_LOG}"
EXTRA_COLUMNS_EXPRESSION_FOR_TABLE="${EXTRA_COLUMNS_EXPRESSION_TRACE_LOG}"
# Do not try to resolve stack traces in case of debug/sanitizers
# build, since it is too slow (flushing of trace_log can take ~1min
# with such MV attached)
if [[ "$debug_or_sanitizer_build" = 1 ]]; then
EXTRA_COLUMNS_EXPRESSION_FOR_TABLE="${EXTRA_COLUMNS_EXPRESSION}"
else
EXTRA_COLUMNS_EXPRESSION_FOR_TABLE="${EXTRA_COLUMNS_EXPRESSION_TRACE_LOG}"
fi
else
EXTRA_COLUMNS_FOR_TABLE="${EXTRA_COLUMNS}"
EXTRA_COLUMNS_EXPRESSION_FOR_TABLE="${EXTRA_COLUMNS_EXPRESSION}"
@ -182,3 +192,13 @@ function setup_logs_replication
" || continue
done
)
function stop_logs_replication
{
echo "Detach all logs replication"
clickhouse-client --query "select database||'.'||table from system.tables where database = 'system' and (table like '%_sender' or table like '%_watcher')" | {
tee /dev/stderr
} | {
xargs -n1 -r -i clickhouse-client --query "drop table {}"
}
}

View File

@ -3,6 +3,7 @@
from argparse import ArgumentParser
import os
import jinja2
import itertools
def removesuffix(text, suffix):
@ -47,6 +48,7 @@ def main(args):
loader=jinja2.FileSystemLoader(suite_dir),
keep_trailing_newline=True,
)
j2env.globals.update(product=itertools.product)
test_names = os.listdir(suite_dir)
for test_name in test_names:

View File

@ -212,11 +212,11 @@ quit
gdb -batch -command script.gdb -p $server_pid &
sleep 5
# gdb will send SIGSTOP, spend some time loading debug info and then send SIGCONT, wait for it (up to send_timeout, 300s)
# gdb will send SIGSTOP, spend some time loading debug info, and then send SIGCONT, wait for it (up to send_timeout, 300s)
time clickhouse-client --query "SELECT 'Connected to clickhouse-server after attaching gdb'" ||:
# Check connectivity after we attach gdb, because it might cause the server
# to freeze and the fuzzer will fail. In debug build it can take a lot of time.
# to freeze, and the fuzzer will fail. In debug build, it can take a lot of time.
for _ in {1..180}
do
if clickhouse-client --query "select 1"
@ -226,14 +226,15 @@ quit
sleep 1
done
kill -0 $server_pid # This checks that it is our server that is started and not some other one
echo 'Server started and responded'
echo 'Server started and responded.'
setup_logs_replication
# SC2012: Use find instead of ls to better handle non-alphanumeric filenames. They are all alphanumeric.
# SC2046: Quote this to prevent word splitting. Actually I need word splitting.
# SC2046: Quote this to prevent word splitting. Actually, I need word splitting.
# shellcheck disable=SC2012,SC2046
timeout -s TERM --preserve-status 30m clickhouse-client \
--max_memory_usage_in_client=1000000000 \
--receive_timeout=10 \
--receive_data_timeout_ms=10000 \
--stacktrace \
@ -253,10 +254,10 @@ quit
wait "$fuzzer_pid" || fuzzer_exit_code=$?
echo "Fuzzer exit code is $fuzzer_exit_code"
# If the server dies, most often the fuzzer returns code 210: connetion
# If the server dies, most often the fuzzer returns Code 210: Connetion
# refused, and sometimes also code 32: attempt to read after eof. For
# simplicity, check again whether the server is accepting connections, using
# clickhouse-client. We don't check for existence of server process, because
# simplicity, check again whether the server is accepting connections using
# clickhouse-client. We don't check for the existence of the server process, because
# the process is still present while the server is terminating and not
# accepting the connections anymore.

View File

@ -217,6 +217,9 @@ ls -la /
clickhouse-client -q "system flush logs" ||:
# stop logs replication to make it possible to dump logs tables via clickhouse-local
stop_logs_replication
# Stop server so we can safely read data with clickhouse-local.
# Why do we read data with clickhouse-local?
# Because it's the simplest way to read it when server has crashed.

View File

@ -0,0 +1,28 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v23.10.5.20-stable (e84001e5c61) FIXME as compared to v23.10.4.25-stable (330fd687d41)
#### Improvement
* Backported in [#56924](https://github.com/ClickHouse/ClickHouse/issues/56924): There was a potential vulnerability in previous ClickHouse versions: if a user has connected and unsuccessfully tried to authenticate with the "interserver secret" method, the server didn't terminate the connection immediately but continued to receive and ignore the leftover packets from the client. While these packets are ignored, they are still parsed, and if they use a compression method with another known vulnerability, it will lead to exploitation of it without authentication. This issue was found with [ClickHouse Bug Bounty Program](https://github.com/ClickHouse/ClickHouse/issues/38986) by https://twitter.com/malacupa. [#56794](https://github.com/ClickHouse/ClickHouse/pull/56794) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### Build/Testing/Packaging Improvement
* Backported in [#57023](https://github.com/ClickHouse/ClickHouse/issues/57023): There was an attempt to have the proper listing in [#44311](https://github.com/ClickHouse/ClickHouse/issues/44311), but the fix itself was in the wrong place, so it's still broken. See an [example](https://github.com/ClickHouse/ClickHouse/actions/runs/6897342568/job/18781001022#step:8:25). [#56989](https://github.com/ClickHouse/ClickHouse/pull/56989) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix ON CLUSTER queries without database on initial node [#56484](https://github.com/ClickHouse/ClickHouse/pull/56484) ([Nikolay Degterinsky](https://github.com/evillique)).
* Fix buffer overflow in Gorilla codec [#57107](https://github.com/ClickHouse/ClickHouse/pull/57107) ([Nikolay Degterinsky](https://github.com/evillique)).
* Close interserver connection on any exception before authentication [#57142](https://github.com/ClickHouse/ClickHouse/pull/57142) ([Antonio Andelic](https://github.com/antonio2368)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Fix client suggestions for user without grants [#56234](https://github.com/ClickHouse/ClickHouse/pull/56234) ([Nikolay Degterinsky](https://github.com/evillique)).
* Fix pygithub [#56778](https://github.com/ClickHouse/ClickHouse/pull/56778) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Avoid dependencies with no fixed versions [#56914](https://github.com/ClickHouse/ClickHouse/pull/56914) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Tiny improvement security [#57171](https://github.com/ClickHouse/ClickHouse/pull/57171) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).

View File

@ -0,0 +1,26 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v23.3.18.15-lts (7228475d77a) FIXME as compared to v23.3.17.13-lts (e867d59020f)
#### Improvement
* Backported in [#56928](https://github.com/ClickHouse/ClickHouse/issues/56928): There was a potential vulnerability in previous ClickHouse versions: if a user has connected and unsuccessfully tried to authenticate with the "interserver secret" method, the server didn't terminate the connection immediately but continued to receive and ignore the leftover packets from the client. While these packets are ignored, they are still parsed, and if they use a compression method with another known vulnerability, it will lead to exploitation of it without authentication. This issue was found with [ClickHouse Bug Bounty Program](https://github.com/ClickHouse/ClickHouse/issues/38986) by https://twitter.com/malacupa. [#56794](https://github.com/ClickHouse/ClickHouse/pull/56794) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### Build/Testing/Packaging Improvement
* Backported in [#57019](https://github.com/ClickHouse/ClickHouse/issues/57019): There was an attempt to have the proper listing in [#44311](https://github.com/ClickHouse/ClickHouse/issues/44311), but the fix itself was in the wrong place, so it's still broken. See an [example](https://github.com/ClickHouse/ClickHouse/actions/runs/6897342568/job/18781001022#step:8:25). [#56989](https://github.com/ClickHouse/ClickHouse/pull/56989) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix buffer overflow in Gorilla codec [#57107](https://github.com/ClickHouse/ClickHouse/pull/57107) ([Nikolay Degterinsky](https://github.com/evillique)).
* Close interserver connection on any exception before authentication [#57142](https://github.com/ClickHouse/ClickHouse/pull/57142) ([Antonio Andelic](https://github.com/antonio2368)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Fix pygithub [#56778](https://github.com/ClickHouse/ClickHouse/pull/56778) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Avoid dependencies with no fixed versions [#56914](https://github.com/ClickHouse/ClickHouse/pull/56914) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Tiny improvement security [#57171](https://github.com/ClickHouse/ClickHouse/pull/57171) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).

View File

@ -0,0 +1,28 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v23.8.8.20-lts (5e012a03bf2) FIXME as compared to v23.8.7.24-lts (812b95e14ba)
#### Improvement
* Backported in [#56509](https://github.com/ClickHouse/ClickHouse/issues/56509): Allow backup of materialized view with dropped inner table instead of failing the backup. [#56387](https://github.com/ClickHouse/ClickHouse/pull/56387) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Backported in [#56929](https://github.com/ClickHouse/ClickHouse/issues/56929): There was a potential vulnerability in previous ClickHouse versions: if a user has connected and unsuccessfully tried to authenticate with the "interserver secret" method, the server didn't terminate the connection immediately but continued to receive and ignore the leftover packets from the client. While these packets are ignored, they are still parsed, and if they use a compression method with another known vulnerability, it will lead to exploitation of it without authentication. This issue was found with [ClickHouse Bug Bounty Program](https://github.com/ClickHouse/ClickHouse/issues/38986) by https://twitter.com/malacupa. [#56794](https://github.com/ClickHouse/ClickHouse/pull/56794) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### Build/Testing/Packaging Improvement
* Backported in [#57020](https://github.com/ClickHouse/ClickHouse/issues/57020): There was an attempt to have the proper listing in [#44311](https://github.com/ClickHouse/ClickHouse/issues/44311), but the fix itself was in the wrong place, so it's still broken. See an [example](https://github.com/ClickHouse/ClickHouse/actions/runs/6897342568/job/18781001022#step:8:25). [#56989](https://github.com/ClickHouse/ClickHouse/pull/56989) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix ON CLUSTER queries without database on initial node [#56484](https://github.com/ClickHouse/ClickHouse/pull/56484) ([Nikolay Degterinsky](https://github.com/evillique)).
* Fix buffer overflow in Gorilla codec [#57107](https://github.com/ClickHouse/ClickHouse/pull/57107) ([Nikolay Degterinsky](https://github.com/evillique)).
* Close interserver connection on any exception before authentication [#57142](https://github.com/ClickHouse/ClickHouse/pull/57142) ([Antonio Andelic](https://github.com/antonio2368)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Fix pygithub [#56778](https://github.com/ClickHouse/ClickHouse/pull/56778) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Avoid dependencies with no fixed versions [#56914](https://github.com/ClickHouse/ClickHouse/pull/56914) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Tiny improvement security [#57171](https://github.com/ClickHouse/ClickHouse/pull/57171) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).

View File

@ -0,0 +1,28 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v23.9.6.20-stable (cf7e84bb8cf) FIXME as compared to v23.9.5.29-stable (f8554c1a1ff)
#### Improvement
* Backported in [#56930](https://github.com/ClickHouse/ClickHouse/issues/56930): There was a potential vulnerability in previous ClickHouse versions: if a user has connected and unsuccessfully tried to authenticate with the "interserver secret" method, the server didn't terminate the connection immediately but continued to receive and ignore the leftover packets from the client. While these packets are ignored, they are still parsed, and if they use a compression method with another known vulnerability, it will lead to exploitation of it without authentication. This issue was found with [ClickHouse Bug Bounty Program](https://github.com/ClickHouse/ClickHouse/issues/38986) by https://twitter.com/malacupa. [#56794](https://github.com/ClickHouse/ClickHouse/pull/56794) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### Build/Testing/Packaging Improvement
* Backported in [#57022](https://github.com/ClickHouse/ClickHouse/issues/57022): There was an attempt to have the proper listing in [#44311](https://github.com/ClickHouse/ClickHouse/issues/44311), but the fix itself was in the wrong place, so it's still broken. See an [example](https://github.com/ClickHouse/ClickHouse/actions/runs/6897342568/job/18781001022#step:8:25). [#56989](https://github.com/ClickHouse/ClickHouse/pull/56989) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix ON CLUSTER queries without database on initial node [#56484](https://github.com/ClickHouse/ClickHouse/pull/56484) ([Nikolay Degterinsky](https://github.com/evillique)).
* Fix buffer overflow in Gorilla codec [#57107](https://github.com/ClickHouse/ClickHouse/pull/57107) ([Nikolay Degterinsky](https://github.com/evillique)).
* Close interserver connection on any exception before authentication [#57142](https://github.com/ClickHouse/ClickHouse/pull/57142) ([Antonio Andelic](https://github.com/antonio2368)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Fix client suggestions for user without grants [#56234](https://github.com/ClickHouse/ClickHouse/pull/56234) ([Nikolay Degterinsky](https://github.com/evillique)).
* Fix pygithub [#56778](https://github.com/ClickHouse/ClickHouse/pull/56778) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Avoid dependencies with no fixed versions [#56914](https://github.com/ClickHouse/ClickHouse/pull/56914) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Tiny improvement security [#57171](https://github.com/ClickHouse/ClickHouse/pull/57171) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).

View File

@ -47,6 +47,12 @@ SELECT * FROM test_table;
└──────┴───────┘
```
## Virtual columns {#virtual-columns}
- `_path` — Path to the file. Type: `LowCardinalty(String)`.
- `_file` — Name of the file. Type: `LowCardinalty(String)`.
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
## See also
[Azure Blob Storage Table Function](/docs/en/sql-reference/table-functions/azureBlobStorage)

View File

@ -85,6 +85,10 @@ You can also change any [rocksdb options](https://github.com/facebook/rocksdb/wi
</rocksdb>
```
By default trivial approximate count optimization is turned off, which might affect the performance `count()` queries. To enable this
optimization set up `optimize_trivial_approximate_count_query = 1`. Also, this setting affects `system.tables` for EmbeddedRocksDB engine,
turn on the settings to see approximate values for `total_rows` and `total_bytes`.
## Supported operations {#supported-operations}
### Inserts

View File

@ -230,8 +230,9 @@ libhdfs3 support HDFS namenode HA.
## Virtual Columns {#virtual-columns}
- `_path` — Path to the file.
- `_file` — Name of the file.
- `_path` — Path to the file. Type: `LowCardinalty(String)`.
- `_file` — Name of the file. Type: `LowCardinalty(String)`.
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
## Storage Settings {#storage-settings}

View File

@ -142,8 +142,9 @@ Code: 48. DB::Exception: Received from localhost:9000. DB::Exception: Reading fr
## Virtual columns {#virtual-columns}
- `_path` — Path to the file.
- `_file` — Name of the file.
- `_path` — Path to the file. Type: `LowCardinalty(String)`.
- `_file` — Name of the file. Type: `LowCardinalty(String)`.
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
For more information about virtual columns see [here](../../../engines/table-engines/index.md#table_engines-virtual_columns).

View File

@ -504,8 +504,8 @@ Indexes of type `set` can be utilized by all functions. The other index types ar
| Function (operator) / Index | primary key | minmax | ngrambf_v1 | tokenbf_v1 | bloom_filter | inverted |
|------------------------------------------------------------------------------------------------------------|-------------|--------|------------|------------|--------------|----------|
| [equals (=, ==)](/docs/en/sql-reference/functions/comparison-functions.md/#function-equals) | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ |
| [notEquals(!=, &lt;&gt;)](/docs/en/sql-reference/functions/comparison-functions.md/#function-notequals) | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ |
| [equals (=, ==)](/docs/en/sql-reference/functions/comparison-functions.md/#equals) | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ |
| [notEquals(!=, &lt;&gt;)](/docs/en/sql-reference/functions/comparison-functions.md/#notequals) | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ |
| [like](/docs/en/sql-reference/functions/string-search-functions.md/#function-like) | ✔ | ✔ | ✔ | ✔ | ✗ | ✔ |
| [notLike](/docs/en/sql-reference/functions/string-search-functions.md/#function-notlike) | ✔ | ✔ | ✔ | ✔ | ✗ | ✔ |
| [startsWith](/docs/en/sql-reference/functions/string-functions.md/#startswith) | ✔ | ✔ | ✔ | ✔ | ✗ | ✔ |
@ -513,10 +513,10 @@ Indexes of type `set` can be utilized by all functions. The other index types ar
| [multiSearchAny](/docs/en/sql-reference/functions/string-search-functions.md/#function-multisearchany) | ✗ | ✗ | ✔ | ✗ | ✗ | ✔ |
| [in](/docs/en/sql-reference/functions/in-functions#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ |
| [notIn](/docs/en/sql-reference/functions/in-functions#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ |
| [less (<)](/docs/en/sql-reference/functions/comparison-functions.md/#function-less) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ |
| [greater (>)](/docs/en/sql-reference/functions/comparison-functions.md/#function-greater) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ |
| [lessOrEquals (<=)](/docs/en/sql-reference/functions/comparison-functions.md/#function-lessorequals) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ |
| [greaterOrEquals (>=)](/docs/en/sql-reference/functions/comparison-functions.md/#function-greaterorequals) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ |
| [less (<)](/docs/en/sql-reference/functions/comparison-functions.md/#less) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ |
| [greater (>)](/docs/en/sql-reference/functions/comparison-functions.md/#greater) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ |
| [lessOrEquals (<=)](/docs/en/sql-reference/functions/comparison-functions.md/#lessorequals) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ |
| [greaterOrEquals (>=)](/docs/en/sql-reference/functions/comparison-functions.md/#greaterorequals) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ |
| [empty](/docs/en/sql-reference/functions/array-functions#function-empty) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ |
| [notEmpty](/docs/en/sql-reference/functions/array-functions#function-notempty) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ |
| [has](/docs/en/sql-reference/functions/array-functions#function-has) | ✗ | ✗ | ✔ | ✔ | ✔ | ✔ |

View File

@ -87,12 +87,18 @@ $ echo -e "1,2\n3,4" | clickhouse-local -q "CREATE TABLE table (a Int64, b Int64
- Indices
- Replication
## PARTITION BY
## PARTITION BY {#partition-by}
`PARTITION BY` — Optional. It is possible to create separate files by partitioning the data on a partition key. In most cases, you don't need a partition key, and if it is needed you generally don't need a partition key more granular than by month. Partitioning does not speed up queries (in contrast to the ORDER BY expression). You should never use too granular partitioning. Don't partition your data by client identifiers or names (instead, make client identifier or name the first column in the ORDER BY expression).
For partitioning by month, use the `toYYYYMM(date_column)` expression, where `date_column` is a column with a date of the type [Date](/docs/en/sql-reference/data-types/date.md). The partition names here have the `"YYYYMM"` format.
## Virtual Columns {#virtual-columns}
- `_path` — Path to the file. Type: `LowCardinalty(String)`.
- `_file` — Name of the file. Type: `LowCardinalty(String)`.
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
## Settings {#settings}
- [engine_file_empty_if_not_exists](/docs/en/operations/settings/settings.md#engine-file-emptyif-not-exists) - allows to select empty data from a file that doesn't exist. Disabled by default.

View File

@ -103,6 +103,12 @@ SELECT * FROM url_engine_table
For partitioning by month, use the `toYYYYMM(date_column)` expression, where `date_column` is a column with a date of the type [Date](/docs/en/sql-reference/data-types/date.md). The partition names here have the `"YYYYMM"` format.
## Virtual Columns {#virtual-columns}
- `_path` — Path to the `URL`. Type: `LowCardinalty(String)`.
- `_file` — Resource name of the `URL`. Type: `LowCardinalty(String)`.
- `_size` — Size of the resource in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
## Storage Settings {#storage-settings}
- [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default.

View File

@ -74,7 +74,7 @@ The maximum number of threads that will be used for fetching data parts from ano
Type: UInt64
Default: 8
Default: 16
## background_merges_mutations_concurrency_ratio
@ -136,7 +136,7 @@ The maximum number of threads that will be used for constantly executing some li
Type: UInt64
Default: 128
Default: 512
## backup_threads

View File

@ -4805,6 +4805,243 @@ a Tuple(
If a table has a space-filling curve in its index, e.g. `ORDER BY mortonEncode(x, y)`, and the query has conditions on its arguments, e.g. `x >= 10 AND x <= 20 AND y >= 20 AND y <= 30`, use the space-filling curve for index analysis.
## query_plan_enable_optimizations {#query_plan_enable_optimizations}
Toggles query optimization at the query plan level.
:::note
This is an expert-level setting which should only be used for debugging by developers. The setting may change in future in backward-incompatible ways or be removed.
:::
Possible values:
- 0 - Disable all optimizations at the query plan level
- 1 - Enable optimizations at the query plan level (but individual optimizations may still be disabled via their individual settings)
Default value: `1`.
## query_plan_max_optimizations_to_apply
Limits the total number of optimizations applied to query plan, see setting [query_plan_enable_optimizations](#query_plan_enable_optimizations).
Useful to avoid long optimization times for complex queries.
If the actual number of optimizations exceeds this setting, an exception is thrown.
:::note
This is an expert-level setting which should only be used for debugging by developers. The setting may change in future in backward-incompatible ways or be removed.
:::
Type: [UInt64](../../sql-reference/data-types/int-uint.md).
Default value: '10000'
## query_plan_lift_up_array_join
Toggles a query-plan-level optimization which moves ARRAY JOINs up in the execution plan.
Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
:::note
This is an expert-level setting which should only be used for debugging by developers. The setting may change in future in backward-incompatible ways or be removed.
:::
Possible values:
- 0 - Disable
- 1 - Enable
Default value: `1`.
## query_plan_push_down_limit
Toggles a query-plan-level optimization which moves LIMITs down in the execution plan.
Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
:::note
This is an expert-level setting which should only be used for debugging by developers. The setting may change in future in backward-incompatible ways or be removed.
:::
Possible values:
- 0 - Disable
- 1 - Enable
Default value: `1`.
## query_plan_split_filter
:::note
This is an expert-level setting which should only be used for debugging by developers. The setting may change in future in backward-incompatible ways or be removed.
:::
Toggles a query-plan-level optimization which splits filters into expressions.
Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
Possible values:
- 0 - Disable
- 1 - Enable
Default value: `1`.
## query_plan_merge_expressions
Toggles a query-plan-level optimization which merges consecutive filters.
Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
:::note
This is an expert-level setting which should only be used for debugging by developers. The setting may change in future in backward-incompatible ways or be removed.
:::
Possible values:
- 0 - Disable
- 1 - Enable
Default value: `1`.
## query_plan_filter_push_down
Toggles a query-plan-level optimization which moves filters down in the execution plan.
Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
:::note
This is an expert-level setting which should only be used for debugging by developers. The setting may change in future in backward-incompatible ways or be removed.
:::
Possible values:
- 0 - Disable
- 1 - Enable
Default value: `1`.
## query_plan_execute_functions_after_sorting
Toggles a query-plan-level optimization which moves expressions after sorting steps.
Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
:::note
This is an expert-level setting which should only be used for debugging by developers. The setting may change in future in backward-incompatible ways or be removed.
:::
Possible values:
- 0 - Disable
- 1 - Enable
Default value: `1`.
## query_plan_reuse_storage_ordering_for_window_functions
Toggles a query-plan-level optimization which uses storage sorting when sorting for window functions.
Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
:::note
This is an expert-level setting which should only be used for debugging by developers. The setting may change in future in backward-incompatible ways or be removed.
:::
Possible values:
- 0 - Disable
- 1 - Enable
Default value: `1`.
## query_plan_lift_up_union
Toggles a query-plan-level optimization which moves larger subtrees of the query plan into union to enable further optimizations.
Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
:::note
This is an expert-level setting which should only be used for debugging by developers. The setting may change in future in backward-incompatible ways or be removed.
:::
Possible values:
- 0 - Disable
- 1 - Enable
Default value: `1`.
## query_plan_distinct_in_order
Toggles the distinct in-order optimization query-plan-level optimization.
Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
:::note
This is an expert-level setting which should only be used for debugging by developers. The setting may change in future in backward-incompatible ways or be removed.
:::
Possible values:
- 0 - Disable
- 1 - Enable
Default value: `1`.
## query_plan_read_in_order
Toggles the read in-order optimization query-plan-level optimization.
Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
:::note
This is an expert-level setting which should only be used for debugging by developers. The setting may change in future in backward-incompatible ways or be removed.
:::
Possible values:
- 0 - Disable
- 1 - Enable
Default value: `1`.
## query_plan_aggregation_in_order
Toggles the aggregation in-order query-plan-level optimization.
Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
:::note
This is an expert-level setting which should only be used for debugging by developers. The setting may change in future in backward-incompatible ways or be removed.
:::
Possible values:
- 0 - Disable
- 1 - Enable
Default value: `0`.
## query_plan_remove_redundant_sorting
Toggles a query-plan-level optimization which removes redundant sorting steps, e.g. in subqueries.
Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
:::note
This is an expert-level setting which should only be used for debugging by developers. The setting may change in future in backward-incompatible ways or be removed.
:::
Possible values:
- 0 - Disable
- 1 - Enable
Default value: `1`.
## query_plan_remove_redundant_distinct
Toggles a query-plan-level optimization which removes redundant DISTINCT steps.
Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
:::note
This is an expert-level setting which should only be used for debugging by developers. The setting may change in future in backward-incompatible ways or be removed.
:::
Possible values:
- 0 - Disable
- 1 - Enable
Default value: `1`.
## dictionary_use_async_executor {#dictionary_use_async_executor}
Execute a pipeline for reading dictionary source in several threads. It's supported only by dictionaries with local CLICKHOUSE source.

View File

@ -14,6 +14,7 @@ Columns:
- `uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — Database UUID.
- `comment` ([String](../../sql-reference/data-types/enum.md)) — Database comment.
- `engine_full` ([String](../../sql-reference/data-types/enum.md)) — Parameters of the database engine.
- `database` ([String](../../sql-reference/data-types/string.md)) Alias for `name`.
The `name` column from this system table is used for implementing the `SHOW DATABASES` query.

View File

@ -56,7 +56,7 @@ Functions:
## Related content
- [Reducing ClickHouse Storage Cost with the Low Cardinality Type Lessons from an Instana Engineer](https://www.instana.com/blog/reducing-clickhouse-storage-cost-with-the-low-cardinality-type-lessons-from-an-instana-engineer/)
- [Reducing ClickHouse Storage Cost with the Low Cardinality Type Lessons from an Instana Engineer](https://altinity.com/blog/2020-5-20-reducing-clickhouse-storage-cost-with-the-low-cardinality-type-lessons-from-an-instana-engineer)
- [String Optimization (video presentation in Russian)](https://youtu.be/rqf-ILRgBdY?list=PL0Z2YDlm0b3iwXCpEFiOOYmwXzVmjJfEt). [Slides in English](https://github.com/ClickHouse/clickhouse-presentations/raw/master/meetup19/string_optimization.pdf)
- Blog: [Optimizing ClickHouse with Schemas and Codecs](https://clickhouse.com/blog/optimize-clickhouse-codecs-compression-schema)
- Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse)

View File

@ -20,7 +20,7 @@ Strings are compared byte-by-byte. Note that this may lead to unexpected results
A string S1 which has another string S2 as prefix is considered longer than S2.
## equals, `=`, `==` operators
## equals, `=`, `==` operators {#equals}
**Syntax**
@ -32,7 +32,7 @@ Alias:
- `a = b` (operator)
- `a == b` (operator)
## notEquals, `!=`, `<>` operators
## notEquals, `!=`, `<>` operators {#notequals}
**Syntax**
@ -44,7 +44,7 @@ Alias:
- `a != b` (operator)
- `a <> b` (operator)
## less, `<` operator
## less, `<` operator {#less}
**Syntax**
@ -55,7 +55,7 @@ less(a, b)
Alias:
- `a < b` (operator)
## greater, `>` operator
## greater, `>` operator {#greater}
**Syntax**
@ -66,7 +66,7 @@ greater(a, b)
Alias:
- `a > b` (operator)
## lessOrEquals, `<=` operator
## lessOrEquals, `<=` operator {#lessorequals}
**Syntax**
@ -77,7 +77,7 @@ lessOrEquals(a, b)
Alias:
- `a <= b` (operator)
## greaterOrEquals, `>=` operator
## greaterOrEquals, `>=` operator {#greaterorequals}
**Syntax**

View File

@ -2533,13 +2533,14 @@ formatDateTime(Time, Format[, Timezone])
Returns time and date values according to the determined format.
**Replacement fields**
Using replacement fields, you can define a pattern for the resulting string. “Example” column shows formatting result for `2018-01-02 22:33:44`.
| Placeholder | Description | Example |
| Placeholder | Description | Example |
|----------|---------------------------------------------------------|------------|
| %a | abbreviated weekday name (Mon-Sun) | Mon |
| %b | abbreviated month name (Jan-Dec) | Jan |
| %c | month as an integer number (01-12) | 01 |
| %c | month as an integer number (01-12), see 'Note 3' below | 01 |
| %C | year divided by 100 and truncated to integer (00-99) | 20 |
| %d | day of the month, zero-padded (01-31) | 02 |
| %D | Short MM/DD/YY date, equivalent to %m/%d/%y | 01/02/18 |
@ -2553,8 +2554,8 @@ Using replacement fields, you can define a pattern for the resulting string. “
| %i | minute (00-59) | 33 |
| %I | hour in 12h format (01-12) | 10 |
| %j | day of the year (001-366) | 002 |
| %k | hour in 24h format (00-23) | 22 |
| %l | hour in 12h format (01-12) | 09 |
| %k | hour in 24h format (00-23), see 'Note 3' below | 14 |
| %l | hour in 12h format (01-12), see 'Note 3' below | 09 |
| %m | month as an integer number (01-12) | 01 |
| %M | full month name (January-December), see 'Note 2' below | January |
| %n | new-line character () | |
@ -2579,6 +2580,8 @@ Note 1: In ClickHouse versions earlier than v23.4, `%f` prints a single zero (0)
Note 2: In ClickHouse versions earlier than v23.4, `%M` prints the minute (00-59) instead of the full month name (January-December). The previous behavior can be restored using setting `formatdatetime_parsedatetime_m_is_month_name = 0`.
Note 3: In ClickHouse versions earlier than v23.11, function `parseDateTime()` required leading zeros for formatters `%c` (month) and `%l`/`%k` (hour), e.g. `07`. In later versions, the leading zero may be omitted, e.g. `7`. The previous behavior can be restored using setting `parsedatetime_parse_without_leading_zeros = 0`. Note that function `formatDateTime()` by default still prints leading zeros for `%c` and `%l`/`%k` to not break existing use cases. This behavior can be changed by setting `formatdatetime_format_without_leading_zeros = 1`.
**Example**
``` sql

View File

@ -164,7 +164,7 @@ Consider a list of contacts that may specify multiple ways to contact a customer
└──────────┴──────┴───────────┴───────────┘
```
The `mail` and `phone` fields are of type String, but the `icq` field is `UInt32`, so it needs to be converted to `String`.
The `mail` and `phone` fields are of type String, but the `telegram` field is `UInt32`, so it needs to be converted to `String`.
Get the first available contact method for the customer from the contact list:

View File

@ -6,11 +6,9 @@ sidebar_label: Mathematical
# Mathematical Functions
All the functions return a Float64 number. Results are generally as close to the actual result as possible, but in some cases less precise than the machine-representable number.
## e
Returns e.
Returns e ([Euler's constant](https://en.wikipedia.org/wiki/Euler%27s_constant))
**Syntax**
@ -18,15 +16,22 @@ Returns e.
e()
```
**Returned value**
Type: [Float64](../../sql-reference/data-types/float.md).
## pi
Returns π.
Returns π ([Pi](https://en.wikipedia.org/wiki/Pi)).
**Syntax**
```sql
pi()
```
**Returned value**
Type: [Float64](../../sql-reference/data-types/float.md).
## exp
@ -38,6 +43,14 @@ Returns e to the power of the given argument.
exp(x)
```
**Arguments**
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
**Returned value**
Type: [Float*](../../sql-reference/data-types/float.md).
## log
Returns the natural logarithm of the argument.
@ -50,6 +63,14 @@ log(x)
Alias: `ln(x)`
**Arguments**
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
**Returned value**
Type: [Float*](../../sql-reference/data-types/float.md).
## exp2
Returns 2 to the power of the given argument
@ -60,6 +81,14 @@ Returns 2 to the power of the given argument
exp2(x)
```
**Arguments**
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
**Returned value**
Type: [Float*](../../sql-reference/data-types/float.md).
## intExp2
Like `exp` but returns a UInt64.
@ -80,6 +109,14 @@ Returns the binary logarithm of the argument.
log2(x)
```
**Arguments**
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
**Returned value**
Type: [Float*](../../sql-reference/data-types/float.md).
## exp10
Returns 10 to the power of the given argument.
@ -90,6 +127,14 @@ Returns 10 to the power of the given argument.
exp10(x)
```
**Arguments**
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
**Returned value**
Type: [Float*](../../sql-reference/data-types/float.md).
## intExp10
Like `exp10` but returns a UInt64.
@ -110,6 +155,14 @@ Returns the decimal logarithm of the argument.
log10(x)
```
**Arguments**
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
**Returned value**
Type: [Float*](../../sql-reference/data-types/float.md).
## sqrt
Returns the square root of the argument.
@ -118,6 +171,14 @@ Returns the square root of the argument.
sqrt(x)
```
**Arguments**
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
**Returned value**
Type: [Float*](../../sql-reference/data-types/float.md).
## cbrt
Returns the cubic root of the argument.
@ -126,6 +187,14 @@ Returns the cubic root of the argument.
cbrt(x)
```
**Arguments**
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
**Returned value**
Type: [Float*](../../sql-reference/data-types/float.md).
## erf
If `x` is non-negative, then `erf(x / σ√2)` is the probability that a random variable having a normal distribution with standard deviation `σ` takes the value that is separated from the expected value by more than `x`.
@ -136,6 +205,14 @@ If `x` is non-negative, then `erf(x / σ√2)` is the probability that a random
erf(x)
```
**Arguments**
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
**Returned value**
Type: [Float*](../../sql-reference/data-types/float.md).
**Example**
(three sigma rule)
@ -160,6 +237,14 @@ Returns a number close to `1 - erf(x)` without loss of precision for large x
erfc(x)
```
**Arguments**
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
**Returned value**
Type: [Float*](../../sql-reference/data-types/float.md).
## lgamma
Returns the logarithm of the gamma function.
@ -170,6 +255,14 @@ Returns the logarithm of the gamma function.
lgamma(x)
```
**Arguments**
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
**Returned value**
Type: [Float*](../../sql-reference/data-types/float.md).
## tgamma
Returns the gamma function.
@ -180,6 +273,14 @@ Returns the gamma function.
gamma(x)
```
**Arguments**
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
**Returned value**
Type: [Float*](../../sql-reference/data-types/float.md).
## sin
Returns the sine of the argument
@ -190,6 +291,14 @@ Returns the sine of the argument
sin(x)
```
**Arguments**
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
**Returned value**
Type: [Float*](../../sql-reference/data-types/float.md).
## cos
Returns the cosine of the argument.
@ -200,6 +309,14 @@ Returns the cosine of the argument.
cos(x)
```
**Arguments**
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
**Returned value**
Type: [Float*](../../sql-reference/data-types/float.md).
## tan
Returns the tangent of the argument.
@ -210,6 +327,14 @@ Returns the tangent of the argument.
tan(x)
```
**Arguments**
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
**Returned value**
Type: [Float*](../../sql-reference/data-types/float.md).
## asin
Returns the arc sine of the argument.
@ -220,6 +345,14 @@ Returns the arc sine of the argument.
asin(x)
```
**Arguments**
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
**Returned value**
Type: [Float*](../../sql-reference/data-types/float.md).
## acos
Returns the arc cosine of the argument.
@ -230,6 +363,14 @@ Returns the arc cosine of the argument.
acos(x)
```
**Arguments**
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
**Returned value**
Type: [Float*](../../sql-reference/data-types/float.md).
## atan
Returns the arc tangent of the argument.
@ -240,6 +381,14 @@ Returns the arc tangent of the argument.
atan(x)
```
**Arguments**
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
**Returned value**
Type: [Float*](../../sql-reference/data-types/float.md).
## pow
Returns `x` to the power of `y`.
@ -252,6 +401,15 @@ pow(x, y)
Alias: `power(x, y)`
**Arguments**
- `x` - [(U)Int8/16/32/64](../../sql-reference/data-types/int-uint.md) or [Float*](../../sql-reference/data-types/float.md)
- `y` - [(U)Int8/16/32/64](../../sql-reference/data-types/int-uint.md) or [Float*](../../sql-reference/data-types/float.md)
**Returned value**
Type: [Float64](../../sql-reference/data-types/float.md).
## cosh
Returns the [hyperbolic cosine](https://in.mathworks.com/help/matlab/ref/cosh.html) of the argument.

View File

@ -67,7 +67,45 @@ WHERE macro = 'test';
│ test │ Value │
└───────┴──────────────┘
```
## getClientHTTPHeader
Returns the value of specified http header.If there is no such header or the request method is not http, it will throw an exception.
**Syntax**
```sql
getClientHTTPHeader(name);
```
**Arguments**
- `name` — HTTP header name .[String](../../sql-reference/data-types/string.md#string)
**Returned value**
Value of the specified header.
Type:[String](../../sql-reference/data-types/string.md#string).
When we use `clickhouse-client` to execute this function, we'll always get empty string, because client doesn't use http protocol.
```sql
SELECT getCientHTTPHeader('test')
```
result:
```text
┌─getClientHTTPHeader('test')─┐
│ │
└────────────------───────────┘
```
Try to use http request:
```shell
echo "select getClientHTTPHeader('X-Clickhouse-User')" | curl -H 'X-ClickHouse-User: default' -H 'X-ClickHouse-Key: ' 'http://localhost:8123/' -d @-
#result
default
```
## FQDN
Returns the fully qualified domain name of the ClickHouse server.

View File

@ -67,6 +67,12 @@ SELECT count(*) FROM azureBlobStorage('DefaultEndpointsProtocol=https;AccountNam
└─────────┘
```
## Virtual Columns {#virtual-columns}
- `_path` — Path to the file. Type: `LowCardinalty(String)`.
- `_file` — Name of the file. Type: `LowCardinalty(String)`.
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`.
**See Also**
- [AzureBlobStorage Table Engine](/docs/en/engines/table-engines/integrations/azureBlobStorage.md)

View File

@ -191,12 +191,13 @@ Query the total number of rows from all files `file002` inside any folder in dir
SELECT count(*) FROM file('big_dir/**/file002', 'CSV', 'name String, value UInt32');
```
## Virtual Columns
## Virtual Columns {#virtual-columns}
- `_path` — Path to the file.
- `_file` — Name of the file.
- `_path` — Path to the file. Type: `LowCardinalty(String)`.
- `_file` — Name of the file. Type: `LowCardinalty(String)`.
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`.
## Settings
## Settings {#settings}
- [engine_file_empty_if_not_exists](/docs/en/operations/settings/settings.md#engine-file-emptyif-not-exists) - allows to select empty data from a file that doesn't exist. Disabled by default.
- [engine_file_truncate_on_insert](/docs/en/operations/settings/settings.md#engine-file-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default.

View File

@ -0,0 +1,86 @@
---
slug: /en/sql-reference/table-functions/fuzzJSON
sidebar_position: 75
sidebar_label: fuzzJSON
---
# fuzzJSON
Perturbs a JSON string with random variations.
``` sql
fuzzJSON({ named_collection [option=value [,..]] | json_str[, random_seed] })
```
**Arguments**
- `named_collection`- A [NAMED COLLECTION](/docs/en/sql-reference/statements/create/named-collection.md).
- `option=value` - Named collection optional parameters and their values.
- `json_str` (String) - The source string representing structured data in JSON format.
- `random_seed` (UInt64) - Manual random seed for producing stable results.
- `reuse_output` (boolean) - Reuse the output from a fuzzing process as input for the next fuzzer.
- `max_output_length` (UInt64) - Maximum allowable length of the generated or perturbed JSON string.
- `probability` (Float64) - The probability to fuzz a JSON field (a key-value pair). Must be within [0, 1] range.
- `max_nesting_level` (UInt64) - The maximum allowed depth of nested structures within the JSON data.
- `max_array_size` (UInt64) - The maximum allowed size of a JSON array.
- `max_object_size` (UInt64) - The maximum allowed number of fields on a single level of a JSON object.
- `max_string_value_length` (UInt64) - The maximum length of a String value.
- `min_key_length` (UInt64) - The minimum key length. Should be at least 1.
- `max_key_length` (UInt64) - The maximum key length. Should be greater or equal than the `min_key_length`, if specified.
**Returned Value**
A table object with a a single column containing perturbed JSON strings.
## Usage Example
``` sql
CREATE NAMED COLLECTION json_fuzzer AS json_str='{}';
SELECT * FROM fuzzJSON(json_fuzzer) LIMIT 3;
```
``` text
{"52Xz2Zd4vKNcuP2":true}
{"UPbOhOQAdPKIg91":3405264103600403024}
{"X0QUWu8yT":[]}
```
``` sql
SELECT * FROM fuzzJSON(json_fuzzer, json_str='{"name" : "value"}', random_seed=1234) LIMIT 3;
```
``` text
{"key":"value", "mxPG0h1R5":"L-YQLv@9hcZbOIGrAn10%GA"}
{"BRE3":true}
{"key":"value", "SWzJdEJZ04nrpSfy":[{"3Q23y":[]}]}
```
``` sql
SELECT * FROM fuzzJSON(json_fuzzer, json_str='{"students" : ["Alice", "Bob"]}', reuse_output=true) LIMIT 3;
```
``` text
{"students":["Alice", "Bob"], "nwALnRMc4pyKD9Krv":[]}
{"students":["1rNY5ZNs0wU&82t_P", "Bob"], "wLNRGzwDiMKdw":[{}]}
{"xeEk":["1rNY5ZNs0wU&82t_P", "Bob"], "wLNRGzwDiMKdw":[{}, {}]}
```
``` sql
SELECT * FROM fuzzJSON(json_fuzzer, json_str='{"students" : ["Alice", "Bob"]}', max_output_length=512) LIMIT 3;
```
``` text
{"students":["Alice", "Bob"], "BREhhXj5":true}
{"NyEsSWzJdeJZ04s":["Alice", 5737924650575683711, 5346334167565345826], "BjVO2X9L":true}
{"NyEsSWzJdeJZ04s":["Alice", 5737924650575683711, 5346334167565345826], "BjVO2X9L":true, "k1SXzbSIz":[{}]}
```
``` sql
SELECT * FROM fuzzJSON('{"id":1}', 1234) LIMIT 3;
```
``` text
{"id":1, "mxPG0h1R5":"L-YQLv@9hcZbOIGrAn10%GA"}
{"BRjE":16137826149911306846}
{"XjKE":15076727133550123563}
```

View File

@ -94,8 +94,9 @@ FROM hdfs('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name Strin
## Virtual Columns
- `_path` — Path to the file.
- `_file` — Name of the file.
- `_path` — Path to the file. Type: `LowCardinalty(String)`.
- `_file` — Name of the file. Type: `LowCardinalty(String)`.
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
## Storage Settings {#storage-settings}

View File

@ -228,6 +228,12 @@ FROM s3(
LIMIT 5;
```
## Virtual Columns {#virtual-columns}
- `_path` — Path to the file. Type: `LowCardinalty(String)`.
- `_file` — Name of the file. Type: `LowCardinalty(String)`.
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`.
## Storage Settings {#storage-settings}
- [s3_truncate_on_insert](/docs/en/operations/settings/settings.md#s3-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default.

View File

@ -50,8 +50,9 @@ Character `|` inside patterns is used to specify failover addresses. They are it
## Virtual Columns
- `_path` — Path to the `URL`.
- `_file` — Resource name of the `URL`.
- `_path` — Path to the `URL`. Type: `LowCardinalty(String)`.
- `_file` — Resource name of the `URL`. Type: `LowCardinalty(String)`.
- `_size` — Size of the resource in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
## Storage Settings {#storage-settings}

View File

@ -337,7 +337,7 @@ SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234
Поддерживаемые типы данных: `Int*`, `UInt*`, `Float*`, `Enum`, `Date`, `DateTime`, `String`, `FixedString`.
Фильтром могут пользоваться функции: [equals](../../../sql-reference/functions/comparison-functions.md), [notEquals](../../../sql-reference/functions/comparison-functions.md), [in](../../../sql-reference/functions/in-functions.md), [notIn](../../../sql-reference/functions/in-functions.md), [has](../../../sql-reference/functions/array-functions.md#hasarr-elem), [hasAny](../../../sql-reference/functions/array-functions.md#hasany), [hasAll](../../../sql-reference/functions/array-functions.md#hasall).
Фильтром могут пользоваться функции: [equals](../../../sql-reference/functions/comparison-functions.md#equals), [notEquals](../../../sql-reference/functions/comparison-functions.md#notequals), [in](../../../sql-reference/functions/in-functions.md), [notIn](../../../sql-reference/functions/in-functions.md), [has](../../../sql-reference/functions/array-functions.md#hasarr-elem), [hasAny](../../../sql-reference/functions/array-functions.md#hasany), [hasAll](../../../sql-reference/functions/array-functions.md#hasall).
**Примеры**
@ -354,8 +354,8 @@ INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE set(100) GRANULARIT
| Функция (оператор) / Индекс | primary key | minmax | ngrambf_v1 | tokenbf_v1 | bloom_filter |
|------------------------------------------------------------------------------------------------------------|-------------|--------|-------------|-------------|---------------|
| [equals (=, ==)](../../../sql-reference/functions/comparison-functions.md#function-equals) | ✔ | ✔ | ✔ | ✔ | ✔ |
| [notEquals(!=, &lt;&gt;)](../../../sql-reference/functions/comparison-functions.md#function-notequals) | ✔ | ✔ | ✔ | ✔ | ✔ |
| [equals (=, ==)](../../../sql-reference/functions/comparison-functions.md#equals) | ✔ | ✔ | ✔ | ✔ | ✔ |
| [notEquals(!=, &lt;&gt;)](../../../sql-reference/functions/comparison-functions.md#notequals) | ✔ | ✔ | ✔ | ✔ | ✔ |
| [like](../../../sql-reference/functions/string-search-functions.md#function-like) | ✔ | ✔ | ✔ | ✔ | ✗ |
| [notLike](../../../sql-reference/functions/string-search-functions.md#function-notlike) | ✔ | ✔ | ✔ | ✔ | ✗ |
| [startsWith](../../../sql-reference/functions/string-functions.md#startswith) | ✔ | ✔ | ✔ | ✔ | ✗ |
@ -363,10 +363,10 @@ INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE set(100) GRANULARIT
| [multiSearchAny](../../../sql-reference/functions/string-search-functions.md#function-multisearchany) | ✗ | ✗ | ✔ | ✗ | ✗ |
| [in](../../../sql-reference/functions/in-functions.md#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ |
| [notIn](../../../sql-reference/functions/in-functions.md#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ |
| [less (\<)](../../../sql-reference/functions/comparison-functions.md#function-less) | ✔ | ✔ | ✗ | ✗ | ✗ |
| [greater (\>)](../../../sql-reference/functions/comparison-functions.md#function-greater) | ✔ | ✔ | ✗ | ✗ | ✗ |
| [lessOrEquals (\<=)](../../../sql-reference/functions/comparison-functions.md#function-lessorequals) | ✔ | ✔ | ✗ | ✗ | ✗ |
| [greaterOrEquals (\>=)](../../../sql-reference/functions/comparison-functions.md#function-greaterorequals) | ✔ | ✔ | ✗ | ✗ | ✗ |
| [less (\<)](../../../sql-reference/functions/comparison-functions.md#less) | ✔ | ✔ | ✗ | ✗ | ✗ |
| [greater (\>)](../../../sql-reference/functions/comparison-functions.md#greater) | ✔ | ✔ | ✗ | ✗ | ✗ |
| [lessOrEquals (\<=)](../../../sql-reference/functions/comparison-functions.md#lessorequals) | ✔ | ✔ | ✗ | ✗ | ✗ |
| [greaterOrEquals (\>=)](../../../sql-reference/functions/comparison-functions.md#greaterorequals) | ✔ | ✔ | ✗ | ✗ | ✗ |
| [empty](../../../sql-reference/functions/array-functions.md#function-empty) | ✔ | ✔ | ✗ | ✗ | ✗ |
| [notEmpty](../../../sql-reference/functions/array-functions.md#function-notempty) | ✔ | ✔ | ✗ | ✗ | ✗ |
| hasToken | ✗ | ✗ | ✗ | ✔ | ✗ |

View File

@ -994,7 +994,7 @@ ClickHouse использует потоки из глобального пул
- Положительное целое число.
Значение по умолчанию: 128.
Значение по умолчанию: 512.
## background_fetches_pool_size {#background_fetches_pool_size}
@ -1004,7 +1004,7 @@ ClickHouse использует потоки из глобального пул
- Положительное целое число.
Значение по умолчанию: 8.
Значение по умолчанию: 16.
## background_distributed_schedule_pool_size {#background_distributed_schedule_pool_size}

View File

@ -23,14 +23,14 @@ sidebar_label: "Функции сравнения"
Замечание. До версии 1.1.54134 сравнение знаковых и беззнаковых целых чисел производилось также, как в C++. То есть, вы могли получить неверный результат в таких случаях: SELECT 9223372036854775807 \> -1. С версии 1.1.54134 поведение изменилось и стало математически корректным.
## equals, оператор a = b и a == b {#function-equals}
## equals, оператор a = b и a == b {#equals}
## notEquals, оператор a != b и a `<>` b {#function-notequals}
## notEquals, оператор a != b и a `<>` b {#notequals}
## less, оператор `<` {#function-less}
## less, оператор `<` {#less}
## greater, оператор `>` {#function-greater}
## greater, оператор `>` {#greater}
## lessOrEquals, оператор `<=` {#function-lessorequals}
## lessOrEquals, оператор `<=` {#lessorequals}
## greaterOrEquals, оператор `>=` {#function-greaterorequals}
## greaterOrEquals, оператор `>=` {#greaterorequals}

View File

@ -349,8 +349,8 @@ WHERE 子句中的条件可以包含对某列数据进行运算的函数表达
| 函数 (操作符) / 索引 | primary key | minmax | ngrambf_v1 | tokenbf_v1 | bloom_filter |
| ------------------------------------------------------------ | ----------- | ------ | ---------- | ---------- | ------------ |
| [equals (=, ==)](../../../sql-reference/functions/comparison-functions.md#function-equals) | ✔ | ✔ | ✔ | ✔ | ✔ |
| [notEquals(!=, &lt;&gt;)](../../../sql-reference/functions/comparison-functions.md#function-notequals) | ✔ | ✔ | ✔ | ✔ | ✔ |
| [equals (=, ==)](../../../sql-reference/functions/comparison-functions.md#equals) | ✔ | ✔ | ✔ | ✔ | ✔ |
| [notEquals(!=, &lt;&gt;)](../../../sql-reference/functions/comparison-functions.md#notequals) | ✔ | ✔ | ✔ | ✔ | ✔ |
| [like](../../../sql-reference/functions/string-search-functions.md#function-like) | ✔ | ✔ | ✔ | ✔ | ✔ |
| [notLike](../../../sql-reference/functions/string-search-functions.md#function-notlike) | ✔ | ✔ | ✗ | ✗ | ✗ |
| [startsWith](../../../sql-reference/functions/string-functions.md#startswith) | ✔ | ✔ | ✔ | ✔ | ✗ |
@ -358,10 +358,10 @@ WHERE 子句中的条件可以包含对某列数据进行运算的函数表达
| [multiSearchAny](../../../sql-reference/functions/string-search-functions.md#function-multisearchany) | ✗ | ✗ | ✔ | ✗ | ✗ |
| [in](../../../sql-reference/functions/in-functions.md#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ |
| [notIn](../../../sql-reference/functions/in-functions.md#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ |
| [less (\<)](../../../sql-reference/functions/comparison-functions.md#function-less) | ✔ | ✔ | ✗ | ✗ | ✗ |
| [greater (\>)](../../../sql-reference/functions/comparison-functions.md#function-greater) | ✔ | ✔ | ✗ | ✗ | ✗ |
| [lessOrEquals (\<=)](../../../sql-reference/functions/comparison-functions.md#function-lessorequals) | ✔ | ✔ | ✗ | ✗ | ✗ |
| [greaterOrEquals (\>=)](../../../sql-reference/functions/comparison-functions.md#function-greaterorequals) | ✔ | ✔ | ✗ | ✗ | ✗ |
| [less (\<)](../../../sql-reference/functions/comparison-functions.md#less) | ✔ | ✔ | ✗ | ✗ | ✗ |
| [greater (\>)](../../../sql-reference/functions/comparison-functions.md#greater) | ✔ | ✔ | ✗ | ✗ | ✗ |
| [lessOrEquals (\<=)](../../../sql-reference/functions/comparison-functions.md#lessorequals) | ✔ | ✔ | ✗ | ✗ | ✗ |
| [greaterOrEquals (\>=)](../../../sql-reference/functions/comparison-functions.md#greaterorequals) | ✔ | ✔ | ✗ | ✗ | ✗ |
| [empty](../../../sql-reference/functions/array-functions.md#function-empty) | ✔ | ✔ | ✗ | ✗ | ✗ |
| [notEmpty](../../../sql-reference/functions/array-functions.md#function-notempty) | ✔ | ✔ | ✗ | ✗ | ✗ |
| hasToken | ✗ | ✗ | ✗ | ✔ | ✗ |

View File

@ -21,14 +21,14 @@ sidebar_label: 比较函数
字符串按字节进行比较。较短的字符串小于以其开头并且至少包含一个字符的所有字符串。
## 等于a=b和a==b 运算符 {#equals-a-b-and-a-b-operator}
## 等于a=b和a==b 运算符 {#equals}
## 不等于a!=b和a&lt;&gt;b 运算符 {#notequals-a-operator-b-and-a-b}
## 不等于a!=b和a&lt;&gt;b 运算符 {#notequals}
## 少, &lt; 运算符 {#less-operator}
## 少, &lt; 运算符 {#less}
## 大于, &gt; 运算符 {#greater-operator}
## 大于, &gt; 运算符 {#greater}
## 小于等于, &lt;= 运算符 {#lessorequals-operator}
## 小于等于, &lt;= 运算符 {#lessorequals}
## 大于等于, &gt;= 运算符 {#greaterorequals-operator}
## 大于等于, &gt;= 运算符 {#greaterorequals}

View File

@ -44,6 +44,8 @@ contents:
dst: /usr/bin/clickhouse-odbc-bridge
- src: root/usr/share/bash-completion/completions
dst: /usr/share/bash-completion/completions
- src: root/usr/share/clickhouse
dst: /usr/share/clickhouse
# docs
- src: ../AUTHORS
dst: /usr/share/doc/clickhouse-common-static/AUTHORS

View File

@ -457,3 +457,10 @@ endif()
if (ENABLE_FUZZING)
add_compile_definitions(FUZZING_MODE=1)
endif ()
if (TARGET ch_contrib::protobuf)
get_property(google_proto_files TARGET ch_contrib::protobuf PROPERTY google_proto_files)
foreach (proto_file IN LISTS google_proto_files)
install(FILES ${proto_file} DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/clickhouse/protos/google/protobuf)
endforeach()
endif ()

View File

@ -306,6 +306,10 @@ void Client::initialize(Poco::Util::Application & self)
/// Set path for format schema files
if (config().has("format_schema_path"))
global_context->setFormatSchemaPath(fs::weakly_canonical(config().getString("format_schema_path")));
/// Set the path for google proto files
if (config().has("google_protos_path"))
global_context->setGoogleProtosPath(fs::weakly_canonical(config().getString("google_protos_path")));
}

View File

@ -37,7 +37,7 @@
<production>{display_name} \e[1;31m:)\e[0m </production> <!-- if it matched to the substring "production" in the server display name -->
</prompt_by_server_display_name>
<!--
<!--
Settings adjustable via command-line parameters
can take their defaults from that config file, see examples:
@ -58,6 +58,9 @@
The same can be done on user-level configuration, just create & adjust: ~/.clickhouse-client/config.xml
-->
<!-- Directory containing the proto files for the well-known Protobuf types.
-->
<google_protos_path>/usr/share/clickhouse/protos/</google_protos_path>
<!-- Analog of .netrc -->
<![CDATA[

View File

@ -1279,6 +1279,8 @@ try
global_context->setHTTPHeaderFilter(*config);
global_context->setMaxTableSizeToDrop(server_settings_.max_table_size_to_drop);
global_context->setClientHTTPHeaderForbiddenHeaders(server_settings_.get_client_http_header_forbidden_headers);
global_context->setAllowGetHTTPHeaderFunction(server_settings_.allow_get_client_http_header);
global_context->setMaxPartitionSizeToDrop(server_settings_.max_partition_size_to_drop);
ConcurrencyControl::SlotCount concurrent_threads_soft_limit = ConcurrencyControl::Unlimited;
@ -1575,6 +1577,10 @@ try
global_context->setFormatSchemaPath(format_schema_path);
fs::create_directories(format_schema_path);
/// Set the path for google proto files
if (config().has("google_protos_path"))
global_context->setGoogleProtosPath(fs::weakly_canonical(config().getString("google_protos_path")));
/// Set path for filesystem caches
fs::path filesystem_caches_path(config().getString("filesystem_caches_path", ""));
if (!filesystem_caches_path.empty())

View File

@ -3,6 +3,7 @@
<tmp_path replace="replace">./tmp/</tmp_path>
<user_files_path replace="replace">./user_files/</user_files_path>
<format_schema_path replace="replace">./format_schemas/</format_schema_path>
<google_protos_path replace="replace">../../contrib/google-protobuf/src/</google_protos_path>
<access_control_path replace="replace">./access/</access_control_path>
<top_level_domains_path replace="replace">./top_level_domains/</top_level_domains_path>
</clickhouse>

View File

@ -1428,6 +1428,10 @@
-->
<format_schema_path>/var/lib/clickhouse/format_schemas/</format_schema_path>
<!-- Directory containing the proto files for the well-known Protobuf types.
-->
<google_protos_path>/usr/share/clickhouse/protos/</google_protos_path>
<!-- Default query masking rules, matching lines would be replaced with something else in the logs
(both text logs and system.query_log).
name - name for the rule (optional)

View File

@ -23,3 +23,7 @@ debug = true
inherits = "release"
# We use LTO here as well to slightly decrease binary size
lto = true
[patch.crates-io]
# Ref: https://github.com/lotabout/tuikit/pull/51
tuikit = { git = "https://github.com/azat-rust/tuikit.git", rev = "e1994c0e03ff02c49cf1471f0cc3cbf185ce0104" }

View File

@ -77,7 +77,7 @@ public:
if (if_argument_pos >= 0)
{
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
data(place).count += countBytesInFilter(flags);
data(place).count += countBytesInFilter(flags.data(), row_begin, row_end);
}
else
{

View File

@ -142,6 +142,7 @@ struct AggregateFunctionSumData
), addManyConditionalInternalImpl, MULTITARGET_FUNCTION_BODY((const Value * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end) /// NOLINT
{
ptr += start;
condition_map += start;
size_t count = end - start;
const auto * end_ptr = ptr + count;

View File

@ -289,15 +289,6 @@ public:
Arena * arena,
ssize_t if_argument_pos = -1) const = 0;
virtual void addBatchSinglePlaceFromInterval( /// NOLINT
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** columns,
Arena * arena,
ssize_t if_argument_pos = -1)
const = 0;
/** In addition to addBatch, this method collects multiple rows of arguments into array "places"
* as long as they are between offsets[i-1] and offsets[i]. This is used for arrayReduce and
* -Array combinator. It might also be used generally to break data dependency when array
@ -586,31 +577,6 @@ public:
}
}
void addBatchSinglePlaceFromInterval( /// NOLINT
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** columns,
Arena * arena,
ssize_t if_argument_pos = -1)
const override
{
if (if_argument_pos >= 0)
{
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
for (size_t i = row_begin; i < row_end; ++i)
{
if (flags[i])
static_cast<const Derived *>(this)->add(place, columns, i, arena);
}
}
else
{
for (size_t i = row_begin; i < row_end; ++i)
static_cast<const Derived *>(this)->add(place, columns, i, arena);
}
}
void addBatchArray(
size_t row_begin,
size_t row_end,

View File

@ -1,134 +0,0 @@
#include <Analyzer/Passes/MoveFunctionsOutOfAnyPass.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/LambdaNode.h>
#include <Analyzer/ConstantNode.h>
namespace DB
{
namespace
{
class AnyFunctionViMoveFunctionsOutOfAnyVisitor : public InDepthQueryTreeVisitorWithContext<AnyFunctionViMoveFunctionsOutOfAnyVisitor>
{
public:
using Base = InDepthQueryTreeVisitorWithContext<AnyFunctionViMoveFunctionsOutOfAnyVisitor>;
using Base::Base;
void enterImpl(QueryTreeNodePtr & node)
{
if (!getSettings().optimize_move_functions_out_of_any)
return;
auto * function_node = node->as<FunctionNode>();
if (!function_node)
return;
/// check function is any
const auto & function_name = function_node->getFunctionName();
if (function_name != "any" && function_name != "anyLast")
return;
auto & arguments = function_node->getArguments().getNodes();
if (arguments.size() != 1)
return;
auto * inside_function_node = arguments[0]->as<FunctionNode>();
/// check argument is a function
if (!inside_function_node)
return;
/// check arguments can not contain arrayJoin or lambda
if (!canRewrite(inside_function_node))
return;
auto & inside_function_node_arguments = inside_function_node->getArguments().getNodes();
/// case any(f())
if (inside_function_node_arguments.empty())
return;
auto it = node_to_rewritten_node.find(node.get());
if (it != node_to_rewritten_node.end())
{
node = it->second;
return;
}
/// checking done, rewrite function
bool changed_argument = false;
for (auto & inside_argument : inside_function_node_arguments)
{
if (inside_argument->as<ConstantNode>()) /// skip constant node
break;
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get(function_name, {inside_argument->getResultType()}, {}, properties);
auto any_function = std::make_shared<FunctionNode>(function_name);
any_function->resolveAsAggregateFunction(std::move(aggregate_function));
auto & any_function_arguments = any_function->getArguments().getNodes();
any_function_arguments.push_back(std::move(inside_argument));
inside_argument = std::move(any_function);
changed_argument = true;
}
if (changed_argument)
{
node_to_rewritten_node.emplace(node.get(), arguments[0]);
node = arguments[0];
}
}
private:
bool canRewrite(const FunctionNode * function_node)
{
for (const auto & argument : function_node->getArguments().getNodes())
{
if (argument->as<LambdaNode>())
return false;
if (const auto * inside_function = argument->as<FunctionNode>())
{
/// Function arrayJoin is special and should be skipped (think about it as
/// an aggregate function), otherwise wrong result will be produced.
/// For example:
/// SELECT *, any(arrayJoin([[], []])) FROM numbers(1) GROUP BY number
/// ┌─number─┬─arrayJoin(array(array(), array()))─┐
/// │ 0 │ [] │
/// │ 0 │ [] │
/// └────────┴────────────────────────────────────┘
if (inside_function->getFunctionName() == "arrayJoin")
return false;
if (!canRewrite(inside_function))
return false;
}
}
return true;
}
/// After query analysis, alias identifier will be resolved to node whose memory address is same with the original one.
/// So we can reuse the rewritten function.
std::unordered_map<IQueryTreeNode *, QueryTreeNodePtr> node_to_rewritten_node;
};
}
void MoveFunctionsOutOfAnyPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
{
AnyFunctionViMoveFunctionsOutOfAnyVisitor visitor(context);
visitor.visit(query_tree_node);
}
}

View File

@ -1,27 +0,0 @@
#pragma once
#include <Analyzer/IQueryTreePass.h>
namespace DB
{
/** Rewrite 'any' and 'anyLast' functions pushing them inside original function.
*
* Example: SELECT any(f(x, y, g(z)));
* Result: SELECT f(any(x), any(y), g(any(z)));
*/
class MoveFunctionsOutOfAnyPass final : public IQueryTreePass
{
public:
String getName() override { return "MoveFunctionsOutOfAnyPass"; }
String getDescription() override
{
return "Rewrite 'any' and 'anyLast' functions pushing them inside original function.";
}
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
};
}

View File

@ -44,7 +44,6 @@
#include <Analyzer/Passes/CrossToInnerJoinPass.h>
#include <Analyzer/Passes/ShardNumColumnToFunctionPass.h>
#include <Analyzer/Passes/ConvertQueryToCNFPass.h>
#include <Analyzer/Passes/MoveFunctionsOutOfAnyPass.h>
#include <Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.h>
@ -284,7 +283,6 @@ void addQueryTreePasses(QueryTreePassManager & manager)
manager.addPass(std::make_unique<CrossToInnerJoinPass>());
manager.addPass(std::make_unique<ShardNumColumnToFunctionPass>());
manager.addPass(std::make_unique<MoveFunctionsOutOfAnyPass>());
manager.addPass(std::make_unique<OptimizeDateOrDateTimeConverterWithPreimagePass>());
}

View File

@ -2566,6 +2566,14 @@ bool ClientBase::processMultiQueryFromFile(const String & file_name)
ReadBufferFromFile in(file_name);
readStringUntilEOF(queries_from_file, in);
if (!global_context->getSettings().log_comment.changed)
{
Settings settings = global_context->getSettings();
/// NOTE: cannot use even weakly_canonical() since it fails for /dev/stdin due to resolving of "pipe:[X]"
settings.log_comment = fs::absolute(fs::path(file_name));
global_context->setSettings(settings);
}
return executeMultiQuery(queries_from_file);
}

View File

@ -447,6 +447,17 @@ ReplxxLineReader::ReplxxLineReader(
uint32_t reverse_search = Replxx::KEY::control('R');
return rx.invoke(Replxx::ACTION::HISTORY_INCREMENTAL_SEARCH, reverse_search);
});
/// Change cursor style for overwrite mode to blinking (see console_codes(5))
rx.bind_key(Replxx::KEY::INSERT, [this](char32_t)
{
overwrite_mode = !overwrite_mode;
if (overwrite_mode)
rx.print("%s", "\033[5 q");
else
rx.print("%s", "\033[0 q");
return rx.invoke(Replxx::ACTION::TOGGLE_OVERWRITE_MODE, 0);
});
}
ReplxxLineReader::~ReplxxLineReader()

View File

@ -41,6 +41,7 @@ private:
bool bracketed_paste_enabled = false;
std::string editor;
bool overwrite_mode = false;
};
}

View File

@ -291,9 +291,20 @@ void StackTrace::tryCapture()
constexpr std::pair<std::string_view, std::string_view> replacements[]
= {{"::__1", ""}, {"std::basic_string<char, std::char_traits<char>, std::allocator<char>>", "String"}};
String collapseNames(String && haystack)
// Demangle @c symbol_name if it's not from __functional header (as such functions don't provide any useful
// information but pollute stack traces).
// Replace parts from @c replacements with shorter aliases
String demangleAndCollapseNames(std::string_view file, const char * const symbol_name)
{
// TODO: surely there is a written version already for better in place search&replace
std::string_view file_copy = file;
if (auto trim_pos = file.find_last_of('/'); trim_pos != file.npos)
file_copy.remove_suffix(file.size() - trim_pos);
if (file_copy.ends_with("functional"))
return "?";
String haystack = demangle(symbol_name);
// TODO myrrc surely there is a written version already for better in place search&replace
for (auto [needle, to] : replacements)
{
size_t pos = 0;
@ -354,6 +365,7 @@ toStringEveryLineImpl([[maybe_unused]] bool fatal, const StackTraceRefTriple & s
DB::WriteBufferFromOwnString out;
out << i << ". ";
String file;
if (std::error_code ec; object && std::filesystem::exists(object->name, ec) && !ec)
{
auto dwarf_it = dwarfs.try_emplace(object->name, object->elf).first;
@ -361,11 +373,14 @@ toStringEveryLineImpl([[maybe_unused]] bool fatal, const StackTraceRefTriple & s
DB::Dwarf::LocationInfo location;
if (dwarf_it->second.findAddress(uintptr_t(physical_addr), location, mode, inline_frames))
out << location.file.toString() << ":" << location.line << ": ";
{
file = location.file.toString();
out << file << ":" << location.line << ": ";
}
}
if (const auto * const symbol = symbol_index.findSymbol(virtual_addr))
out << collapseNames(demangle(symbol->name));
out << demangleAndCollapseNames(file, symbol->name);
else
out << "?";
@ -380,13 +395,14 @@ toStringEveryLineImpl([[maybe_unused]] bool fatal, const StackTraceRefTriple & s
for (size_t j = 0; j < inline_frames.size(); ++j)
{
const auto & frame = inline_frames[j];
const String file_for_inline_frame = frame.location.file.toString();
callback(fmt::format(
"{}.{}. inlined from {}:{}: {}",
i,
j + 1,
frame.location.file.toString(),
file_for_inline_frame,
frame.location.line,
collapseNames(demangle(frame.name))));
demangleAndCollapseNames(file_for_inline_frame, frame.name)));
}
callback(out.str());

View File

@ -4,6 +4,7 @@
#include <cstring>
#include <optional>
#include <base/types.h>
#include <boost/core/noncopyable.hpp>
#include <Common/Exception.h>
#include <Common/levenshteinDistance.h>
#include <Poco/Net/IPAddress.h>
@ -21,7 +22,7 @@ namespace ErrorCodes
namespace
{
struct NetworkInterfaces
struct NetworkInterfaces : public boost::noncopyable
{
ifaddrs * ifaddr;
NetworkInterfaces()
@ -112,8 +113,8 @@ bool isLocalAddress(const Poco::Net::IPAddress & address)
}
}
NetworkInterfaces interfaces;
return interfaces.hasAddress(address);
static NetworkInterfaces network_interfaces;
return network_interfaces.hasAddress(address);
}

View File

@ -25,7 +25,11 @@ protected:
char * compressed_buffer = nullptr;
/// Don't checksum on decompressing.
#if defined(FUZZER)
bool disable_checksum = true;
#else
bool disable_checksum = false;
#endif
/// Allow reading data, compressed by different codecs from one file.
bool allow_different_codecs;

View File

@ -97,7 +97,7 @@ namespace Protocol
};
/// NOTE: If the type of packet argument would be Enum, the comparison packet >= 0 && packet < 10
/// would always be true because of compiler optimisation. That would lead to out-of-bounds error
/// would always be true because of compiler optimization. That would lead to out-of-bounds error
/// if the packet is invalid.
/// See https://www.securecoding.cert.org/confluence/display/cplusplus/INT36-CPP.+Do+not+use+out-of-range+enumeration+values
inline const char * toString(UInt64 packet)

View File

@ -86,10 +86,10 @@ namespace DB
M(Float, background_merges_mutations_concurrency_ratio, 2, "The number of part mutation tasks that can be executed concurrently by each thread in background pool.", 0) \
M(String, background_merges_mutations_scheduling_policy, "round_robin", "The policy on how to perform a scheduling for background merges and mutations. Possible values are: `round_robin` and `shortest_task_first`. ", 0) \
M(UInt64, background_move_pool_size, 8, "The maximum number of threads that will be used for moving data parts to another disk or volume for *MergeTree-engine tables in a background.", 0) \
M(UInt64, background_fetches_pool_size, 8, "The maximum number of threads that will be used for fetching data parts from another replica for *MergeTree-engine tables in a background.", 0) \
M(UInt64, background_fetches_pool_size, 16, "The maximum number of threads that will be used for fetching data parts from another replica for *MergeTree-engine tables in a background.", 0) \
M(UInt64, background_common_pool_size, 8, "The maximum number of threads that will be used for performing a variety of operations (mostly garbage collection) for *MergeTree-engine tables in a background.", 0) \
M(UInt64, background_buffer_flush_schedule_pool_size, 16, "The maximum number of threads that will be used for performing flush operations for Buffer-engine tables in a background.", 0) \
M(UInt64, background_schedule_pool_size, 128, "The maximum number of threads that will be used for constantly executing some lightweight periodic operations.", 0) \
M(UInt64, background_schedule_pool_size, 512, "The maximum number of threads that will be used for constantly executing some lightweight periodic operations.", 0) \
M(UInt64, background_message_broker_schedule_pool_size, 16, "The maximum number of threads that will be used for executing background operations for message streaming.", 0) \
M(UInt64, background_distributed_schedule_pool_size, 16, "The maximum number of threads that will be used for executing distributed sends.", 0) \
M(Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0) \
@ -98,6 +98,8 @@ namespace DB
M(Double, total_memory_tracker_sample_probability, 0, "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation (can be changed with `memory_profiler_sample_min_allocation_size` and `memory_profiler_sample_max_allocation_size`). Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \
M(UInt64, total_memory_profiler_sample_min_allocation_size, 0, "Collect random allocations of size greater or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \
M(UInt64, total_memory_profiler_sample_max_allocation_size, 0, "Collect random allocations of size less or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \
M(String, get_client_http_header_forbidden_headers, "", "Comma separated list of http header names that will not be returned by function getClientHTTPHeader.", 0) \
M(Bool, allow_get_client_http_header, false, "Allow function getClientHTTPHeader", 0) \
M(Bool, validate_tcp_client_information, false, "Validate client_information in the query packet over the native TCP protocol.", 0) \
M(Bool, storage_metadata_write_full_object_key, false, "Write disk metadata files with VERSION_FULL_OBJECT_KEY format", 0) \

View File

@ -512,8 +512,10 @@ class IColumn;
M(Bool, splitby_max_substrings_includes_remaining_string, false, "Functions 'splitBy*()' with 'max_substrings' argument > 0 include the remaining string as last element in the result", 0) \
\
M(Bool, allow_execute_multiif_columnar, true, "Allow execute multiIf function columnar", 0) \
M(Bool, formatdatetime_f_prints_single_zero, false, "Formatter '%f' in function 'formatDateTime()' produces a single zero instead of six zeros if the formatted value has no fractional seconds.", 0) \
M(Bool, formatdatetime_parsedatetime_m_is_month_name, true, "Formatter '%M' in functions 'formatDateTime()' and 'parseDateTime()' produces the month name instead of minutes.", 0) \
M(Bool, formatdatetime_f_prints_single_zero, false, "Formatter '%f' in function 'formatDateTime()' prints a single zero instead of six zeros if the formatted value has no fractional seconds.", 0) \
M(Bool, formatdatetime_parsedatetime_m_is_month_name, true, "Formatter '%M' in functions 'formatDateTime()' and 'parseDateTime()' print/parse the month name instead of minutes.", 0) \
M(Bool, parsedatetime_parse_without_leading_zeros, true, "Formatters '%c', '%l' and '%k' in function 'parseDateTime()' parse months and hours without leading zeros.", 0) \
M(Bool, formatdatetime_format_without_leading_zeros, false, "Formatters '%c', '%l' and '%k' in function 'formatDateTime()' print months and hours without leading zeros.", 0) \
\
M(UInt64, max_partitions_per_insert_block, 100, "Limit maximum number of partitions in single INSERTed block. Zero means unlimited. Throw exception if the block contains too many partitions. This setting is a safety threshold, because using large number of partitions is a common misconception.", 0) \
M(Bool, throw_on_max_partitions_per_insert_block, true, "Used with max_partitions_per_insert_block. If true (default), an exception will be thrown when max_partitions_per_insert_block is reached. If false, details of the insert query reaching this limit with the number of partitions will be logged. This can be useful if you're trying to understand the impact on users when changing max_partitions_per_insert_block.", 0) \
@ -554,7 +556,6 @@ class IColumn;
M(UInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \
M(Bool, enable_lightweight_delete, true, "Enable lightweight DELETE mutations for mergetree tables.", 0) ALIAS(allow_experimental_lightweight_delete) \
M(Bool, apply_deleted_mask, true, "Enables filtering out rows deleted with lightweight DELETE. If disabled, a query will be able to read those rows. This is useful for debugging and \"undelete\" scenarios", 0) \
M(Bool, optimize_move_functions_out_of_any, false, "Move functions out of aggregate functions 'any', 'anyLast'.", 0) \
M(Bool, optimize_normalize_count_variants, true, "Rewrite aggregate functions that semantically equals to count() as count().", 0) \
M(Bool, optimize_injective_functions_inside_uniq, true, "Delete injective functions of one argument inside uniq*() functions.", 0) \
M(Bool, rewrite_count_distinct_if_with_count_distinct_implementation, false, "Rewrite countDistinctIf with count_distinct_implementation configuration", 0) \
@ -682,13 +683,19 @@ class IColumn;
M(Bool, optimize_group_by_constant_keys, true, "Optimize GROUP BY when all keys in block are constant", 0) \
M(Bool, legacy_column_name_of_tuple_literal, false, "List all names of element of large tuple literals in their column names instead of hash. This settings exists only for compatibility reasons. It makes sense to set to 'true', while doing rolling update of cluster from version lower than 21.7 to higher.", 0) \
\
M(Bool, query_plan_enable_optimizations, true, "Apply optimizations to query plan", 0) \
M(Bool, query_plan_enable_optimizations, true, "Globally enable/disable query optimization at the query plan level", 0) \
M(UInt64, query_plan_max_optimizations_to_apply, 10000, "Limit the total number of optimizations applied to query plan. If zero, ignored. If limit reached, throw exception", 0) \
M(Bool, query_plan_lift_up_array_join, true, "Allow to move array joins up in the query plan", 0) \
M(Bool, query_plan_push_down_limit, true, "Allow to move LIMITs down in the query plan", 0) \
M(Bool, query_plan_split_filter, true, "Allow to split filters in the query plan", 0) \
M(Bool, query_plan_merge_expressions, true, "Allow to merge expressions in the query plan", 0) \
M(Bool, query_plan_filter_push_down, true, "Allow to push down filter by predicate query plan step", 0) \
M(Bool, query_plan_execute_functions_after_sorting, true, "Allow to re-order functions after sorting", 0) \
M(Bool, query_plan_reuse_storage_ordering_for_window_functions, true, "Allow to use the storage sorting for window functions", 0) \
M(Bool, query_plan_lift_up_union, true, "Allow to move UNIONs up so that more parts of the query plan can be optimized", 0) \
M(Bool, query_plan_optimize_primary_key, true, "Analyze primary key using query plan (instead of AST)", 0) \
M(Bool, query_plan_read_in_order, true, "Use query plan for read-in-order optimisation", 0) \
M(Bool, query_plan_aggregation_in_order, true, "Use query plan for aggregation-in-order optimisation", 0) \
M(Bool, query_plan_read_in_order, true, "Use query plan for read-in-order optimization", 0) \
M(Bool, query_plan_aggregation_in_order, true, "Use query plan for aggregation-in-order optimization", 0) \
M(Bool, query_plan_remove_redundant_sorting, true, "Remove redundant sorting in query plan. For example, sorting steps related to ORDER BY clauses in subqueries", 0) \
M(Bool, query_plan_remove_redundant_distinct, true, "Remove redundant Distinct step in query plan", 0) \
M(Bool, query_plan_enable_multithreading_after_window_functions, true, "Enable multithreading after evaluating window functions to allow parallel stream processing", 0) \
@ -887,6 +894,7 @@ class IColumn;
MAKE_OBSOLETE(M, UInt64, parallel_replicas_min_number_of_granules_to_enable, 0) \
MAKE_OBSOLETE(M, Bool, query_plan_optimize_projection, true) \
MAKE_OBSOLETE(M, Bool, query_cache_store_results_of_queries_with_nondeterministic_functions, false) \
MAKE_OBSOLETE(M, Bool, optimize_move_functions_out_of_any, false) \
/** The section above is for obsolete settings. Do not add anything there. */

View File

@ -124,6 +124,7 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
{"22.11", {{"use_structure_from_insertion_table_in_table_functions", 0, 2, "Improve using structure from insertion table in table functions"}}},
{"23.4", {{"formatdatetime_f_prints_single_zero", true, false, "Improved compatibility with MySQL DATE_FORMAT()/STR_TO_DATE()"}}},
{"23.4", {{"formatdatetime_parsedatetime_m_is_month_name", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}},
{"23.11", {{"parsedatetime_parse_without_leading_zeros", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}},
{"22.9", {{"force_grouping_standard_compatibility", false, true, "Make GROUPING function output the same as in SQL standard and other DBMS"}}},
{"22.7", {{"cross_to_inner_join_rewrite", 1, 2, "Force rewrite comma join to inner"},
{"enable_positional_arguments", false, true, "Enable positional arguments feature by default"},

View File

@ -22,6 +22,8 @@ public:
virtual ~IDataTypeCustomName() = default;
virtual String getName() const = 0;
virtual bool identical(const IDataTypeCustomName & rhs) const = 0;
};
using DataTypeCustomNamePtr = std::unique_ptr<const IDataTypeCustomName>;
@ -51,6 +53,12 @@ private:
public:
explicit DataTypeCustomFixedName(String name_) : name(name_) {}
String getName() const override { return name; }
bool identical(const IDataTypeCustomName & rhs_) const override
{
if (const auto * rhs = typeid_cast<decltype(this)>(&rhs_))
return name == rhs->getName();
return false;
}
};
}

View File

@ -167,4 +167,19 @@ void registerDataTypeDomainSimpleAggregateFunction(DataTypeFactory & factory)
factory.registerDataTypeCustom("SimpleAggregateFunction", create);
}
bool DataTypeCustomSimpleAggregateFunction::identical(const IDataTypeCustomName & rhs_) const
{
if (const auto * rhs = typeid_cast<decltype(this)>(&rhs_))
{
if (parameters != rhs->parameters)
return false;
if (argument_types.size() != rhs->argument_types.size())
return false;
for (size_t i = 0; i < argument_types.size(); ++i)
if (!argument_types[i]->identical(*rhs->argument_types[i]))
return false;
return function->getName() == rhs->function->getName();
}
return false;
}
}

View File

@ -36,6 +36,7 @@ public:
AggregateFunctionPtr getFunction() const { return function; }
String getName() const override;
bool identical(const IDataTypeCustomName & rhs_) const override;
static void checkSupportedFunctions(const AggregateFunctionPtr & function);
};

View File

@ -72,4 +72,19 @@ DataTypePtr createNested(const DataTypes & types, const Names & names)
return DataTypeFactory::instance().getCustom(std::move(custom_desc));
}
bool DataTypeNestedCustomName::identical(const IDataTypeCustomName & rhs_) const
{
if (const auto * rhs = typeid_cast<decltype(this)>(&rhs_))
{
if (names != rhs->names)
return false;
if (elems.size() != rhs->elems.size())
return false;
for (size_t i = 0; i < elems.size(); ++i)
if (!elems[i]->identical(*rhs->elems[i]))
return false;
return true;
}
return false;
}
}

View File

@ -19,6 +19,7 @@ public:
}
String getName() const override;
bool identical(const IDataTypeCustomName & rhs_) const override;
};
DataTypePtr createNested(const DataTypes & types, const Names & names);

View File

@ -252,4 +252,17 @@ SerializationPtr IDataType::getSerialization(const NameAndTypePair & column)
return column.type->getDefaultSerialization();
}
bool IDataType::identical(const IDataType & rhs) const
{
const auto * rhs_custom_name = rhs.getCustomName();
if (custom_name && rhs_custom_name)
{
return custom_name->identical(*rhs_custom_name);
}
else if (custom_name || rhs_custom_name)
return false;
else
return equals(rhs);
}
}

View File

@ -177,6 +177,12 @@ public:
/// Checks that two instances belong to the same type
virtual bool equals(const IDataType & rhs) const = 0;
/** Checks that two types are exactly identical. Note that two types may be equal but not identical.
* For example, `SimpleAggregateFunction(max, String)` and `SimpleAggregateFunction(anyLast, String)`
* This is used when creating a replicated table. Column types in different replicas must be identical
*/
virtual bool identical(const IDataType & rhs) const;
/// Various properties on behaviour of data type.
/** The data type is dependent on parameters and types with different parameters are different.

View File

@ -263,7 +263,8 @@ PostgreSQLTableStructure fetchPostgreSQLTableStructure(
"attnotnull AS not_null, attndims AS dims, atttypid as type_id, atttypmod as type_modifier "
"FROM pg_attribute "
"WHERE attrelid = (SELECT oid FROM pg_class WHERE {}) "
"AND NOT attisdropped AND attnum > 0", where);
"AND NOT attisdropped AND attnum > 0 "
"ORDER BY attnum ASC", where);
auto postgres_table_with_schema = postgres_schema.empty() ? postgres_table : doubleQuoteString(postgres_schema) + '.' + doubleQuoteString(postgres_table);
table.physical_columns = readNamesAndTypesList(tx, postgres_table_with_schema, query, use_nulls, false);

View File

@ -151,6 +151,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
format_settings.protobuf.output_nullables_with_google_wrappers = settings.output_format_protobuf_nullables_with_google_wrappers;
format_settings.protobuf.skip_fields_with_unsupported_types_in_schema_inference = settings.input_format_protobuf_skip_fields_with_unsupported_types_in_schema_inference;
format_settings.protobuf.use_autogenerated_schema = settings.format_protobuf_use_autogenerated_schema;
format_settings.protobuf.google_protos_path = context->getGoogleProtosPath();
format_settings.regexp.escaping_rule = settings.format_regexp_escaping_rule;
format_settings.regexp.regexp = settings.format_regexp;
format_settings.regexp.skip_unmatched = settings.format_regexp_skip_unmatched;

View File

@ -295,6 +295,7 @@ struct FormatSettings
bool allow_multiple_rows_without_delimiter = false;
bool skip_fields_with_unsupported_types_in_schema_inference = false;
bool use_autogenerated_schema = true;
std::string google_protos_path;
} protobuf;
struct

View File

@ -30,11 +30,11 @@ void ProtobufSchemas::clear()
class ProtobufSchemas::ImporterWithSourceTree : public google::protobuf::compiler::MultiFileErrorCollector
{
public:
explicit ImporterWithSourceTree(const String & schema_directory, WithEnvelope with_envelope_)
: importer(&disk_source_tree, this)
, with_envelope(with_envelope_)
explicit ImporterWithSourceTree(const String & schema_directory, const String & google_protos_path, WithEnvelope with_envelope_)
: importer(&disk_source_tree, this), with_envelope(with_envelope_)
{
disk_source_tree.MapPath("", schema_directory);
disk_source_tree.MapPath("", google_protos_path);
}
~ImporterWithSourceTree() override = default;
@ -112,12 +112,17 @@ private:
};
const google::protobuf::Descriptor * ProtobufSchemas::getMessageTypeForFormatSchema(const FormatSchemaInfo & info, WithEnvelope with_envelope)
const google::protobuf::Descriptor *
ProtobufSchemas::getMessageTypeForFormatSchema(const FormatSchemaInfo & info, WithEnvelope with_envelope, const String & google_protos_path)
{
std::lock_guard lock(mutex);
auto it = importers.find(info.schemaDirectory());
if (it == importers.end())
it = importers.emplace(info.schemaDirectory(), std::make_unique<ImporterWithSourceTree>(info.schemaDirectory(), with_envelope)).first;
it = importers
.emplace(
info.schemaDirectory(),
std::make_unique<ImporterWithSourceTree>(info.schemaDirectory(), google_protos_path, with_envelope))
.first;
auto * importer = it->second.get();
return importer->import(info.schemaPath(), info.messageName());
}

View File

@ -59,7 +59,8 @@ public:
/// Parses the format schema, then parses the corresponding proto file, and returns the descriptor of the message type.
/// The function never returns nullptr, it throws an exception if it cannot load or parse the file.
const google::protobuf::Descriptor * getMessageTypeForFormatSchema(const FormatSchemaInfo & info, WithEnvelope with_envelope);
const google::protobuf::Descriptor *
getMessageTypeForFormatSchema(const FormatSchemaInfo & info, WithEnvelope with_envelope, const String & google_protos_path);
private:
class ImporterWithSourceTree;

View File

@ -195,7 +195,11 @@ struct ArrayElementNumImpl
if (index < array_size)
{
size_t j = !negative ? (current_offset + index) : (offsets[i] - index - 1);
size_t j;
if constexpr (negative)
j = offsets[i] - index - 1;
else
j = current_offset + index;
result[i] = data[j];
if (builder)
builder.update(j);
@ -260,7 +264,7 @@ struct ArrayElementNumImpl
struct ArrayElementStringImpl
{
template <bool negative>
template <bool negative, bool used_builder>
static void vectorConst(
const ColumnString::Chars & data, const ColumnArray::Offsets & offsets, const ColumnString::Offsets & string_offsets,
const ColumnArray::Offset index,
@ -269,21 +273,31 @@ struct ArrayElementStringImpl
{
size_t size = offsets.size();
result_offsets.resize(size);
result_data.reserve(data.size());
ColumnArray::Offset current_offset = 0;
ColumnArray::Offset current_result_offset = 0;
/// get the total result bytes at first, and reduce the cost of result_data.resize.
size_t total_result_bytes = 0;
ColumnString::Chars zero_buf(1);
zero_buf.push_back(0);
std::vector<std::pair<const ColumnString::Char *, UInt64>> selected_bufs;
selected_bufs.reserve(size);
for (size_t i = 0; i < size; ++i)
{
size_t array_size = offsets[i] - current_offset;
if (index < array_size)
{
size_t adjusted_index = !negative ? index : (array_size - index - 1);
size_t adjusted_index;
if constexpr (negative)
adjusted_index = array_size - index - 1;
else
adjusted_index = index;
size_t j = current_offset + adjusted_index;
if (builder)
if constexpr (used_builder)
{
size_t j = current_offset + adjusted_index;
builder.update(j);
}
ColumnArray::Offset string_pos = current_offset == 0 && adjusted_index == 0
? 0
@ -291,30 +305,36 @@ struct ArrayElementStringImpl
ColumnArray::Offset string_size = string_offsets[current_offset + adjusted_index] - string_pos;
result_data.resize(current_result_offset + string_size);
memcpySmallAllowReadWriteOverflow15(&result_data[current_result_offset], &data[string_pos], string_size);
current_result_offset += string_size;
result_offsets[i] = current_result_offset;
total_result_bytes += string_size;
selected_bufs.emplace_back(&data[string_pos], string_size);
result_offsets[i] = total_result_bytes;
}
else
{
/// Insert an empty row.
result_data.resize(current_result_offset + 1);
result_data[current_result_offset] = 0;
current_result_offset += 1;
result_offsets[i] = current_result_offset;
total_result_bytes += 1;
selected_bufs.emplace_back(zero_buf.data(), 1);
result_offsets[i] = total_result_bytes;
if (builder)
if constexpr (used_builder)
builder.update();
}
current_offset = offsets[i];
}
ColumnArray::Offset current_result_offset = 0;
result_data.resize(total_result_bytes);
for (const auto & buf : selected_bufs)
{
memcpySmallAllowReadWriteOverflow15(&result_data[current_result_offset], buf.first, buf.second);
current_result_offset += buf.second;
}
}
/** Implementation for non-constant index.
*/
template <typename TIndex>
template <typename TIndex, bool used_builder>
static void vector(
const ColumnString::Chars & data, const ColumnArray::Offsets & offsets, const ColumnString::Offsets & string_offsets,
const PaddedPODArray<TIndex> & indices,
@ -323,10 +343,14 @@ struct ArrayElementStringImpl
{
size_t size = offsets.size();
result_offsets.resize(size);
result_data.reserve(data.size());
ColumnString::Chars zero_buf(1);
zero_buf.push_back(0);
ColumnArray::Offset current_offset = 0;
ColumnArray::Offset current_result_offset = 0;
/// get the total result bytes at first, and reduce the cost of result_data.resize.
size_t total_result_bytes = 0;
std::vector<std::pair<const ColumnString::Char *, UInt64>> selected_bufs;
selected_bufs.reserve(size);
for (size_t i = 0; i < size; ++i)
{
size_t array_size = offsets[i] - current_offset;
@ -342,35 +366,43 @@ struct ArrayElementStringImpl
if (adjusted_index < array_size)
{
size_t j = current_offset + adjusted_index;
if (builder)
if constexpr (used_builder)
{
size_t j = current_offset + adjusted_index;
builder.update(j);
}
ColumnArray::Offset string_pos = current_offset == 0 && adjusted_index == 0
? 0
: string_offsets[current_offset + adjusted_index - 1];
ColumnArray::Offset string_size = string_offsets[current_offset + adjusted_index] - string_pos;
total_result_bytes += string_size;
selected_bufs.emplace_back(&data[string_pos], string_size);
result_data.resize(current_result_offset + string_size);
memcpySmallAllowReadWriteOverflow15(&result_data[current_result_offset], &data[string_pos], string_size);
current_result_offset += string_size;
result_offsets[i] = current_result_offset;
result_offsets[i] = total_result_bytes;
}
else
{
/// Insert empty string
result_data.resize(current_result_offset + 1);
result_data[current_result_offset] = 0;
current_result_offset += 1;
result_offsets[i] = current_result_offset;
total_result_bytes += 1;
selected_bufs.emplace_back(zero_buf.data(), 1);
result_offsets[i] = total_result_bytes;
if (builder)
if constexpr (used_builder)
builder.update();
}
current_offset = offsets[i];
}
ColumnArray::Offset current_result_offset = 0;
result_data.resize(total_result_bytes);
for (const auto & buf : selected_bufs)
{
memcpySmallAllowReadWriteOverflow15(&result_data[current_result_offset], buf.first, buf.second);
current_result_offset += buf.second;
}
}
};
@ -542,23 +574,47 @@ FunctionArrayElement::executeStringConst(const ColumnsWithTypeAndName & argument
if (index.getType() == Field::Types::UInt64
|| (index.getType() == Field::Types::Int64 && index.get<Int64>() >= 0))
ArrayElementStringImpl::vectorConst<false>(
col_nested->getChars(),
col_array->getOffsets(),
col_nested->getOffsets(),
index.get<UInt64>() - 1,
col_res->getChars(),
col_res->getOffsets(),
builder);
{
if (builder)
ArrayElementStringImpl::vectorConst<false, true>(
col_nested->getChars(),
col_array->getOffsets(),
col_nested->getOffsets(),
index.get<UInt64>() - 1,
col_res->getChars(),
col_res->getOffsets(),
builder);
else
ArrayElementStringImpl::vectorConst<false, false>(
col_nested->getChars(),
col_array->getOffsets(),
col_nested->getOffsets(),
index.get<UInt64>() - 1,
col_res->getChars(),
col_res->getOffsets(),
builder);
}
else if (index.getType() == Field::Types::Int64)
ArrayElementStringImpl::vectorConst<true>(
col_nested->getChars(),
col_array->getOffsets(),
col_nested->getOffsets(),
-(UInt64(index.get<Int64>()) + 1),
col_res->getChars(),
col_res->getOffsets(),
builder);
{
if (builder)
ArrayElementStringImpl::vectorConst<true, true>(
col_nested->getChars(),
col_array->getOffsets(),
col_nested->getOffsets(),
-(UInt64(index.get<Int64>()) + 1),
col_res->getChars(),
col_res->getOffsets(),
builder);
else
ArrayElementStringImpl::vectorConst<true, false>(
col_nested->getChars(),
col_array->getOffsets(),
col_nested->getOffsets(),
-(UInt64(index.get<Int64>()) + 1),
col_res->getChars(),
col_res->getOffsets(),
builder);
}
else
throw Exception(ErrorCodes::LOGICAL_ERROR, "Illegal type of array index");
@ -580,14 +636,25 @@ ColumnPtr FunctionArrayElement::executeString(
return nullptr;
auto col_res = ColumnString::create();
ArrayElementStringImpl::vector<IndexType>(
col_nested->getChars(),
col_array->getOffsets(),
col_nested->getOffsets(),
indices,
col_res->getChars(),
col_res->getOffsets(),
builder);
if (builder)
ArrayElementStringImpl::vector<IndexType, true>(
col_nested->getChars(),
col_array->getOffsets(),
col_nested->getOffsets(),
indices,
col_res->getChars(),
col_res->getOffsets(),
builder);
else
ArrayElementStringImpl::vector<IndexType, false>(
col_nested->getChars(),
col_array->getOffsets(),
col_nested->getOffsets(),
indices,
col_res->getChars(),
col_res->getOffsets(),
builder);
return col_res;
}

View File

@ -50,6 +50,7 @@ public:
}
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{

View File

@ -322,6 +322,18 @@ private:
return writeNumber2(dest, ToMonthImpl::execute(source, timezone));
}
size_t mysqlMonthWithoutLeadingZero(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
{
auto month = ToMonthImpl::execute(source, timezone);
if (month < 10)
{
dest[0] = '0' + month;
return 1;
}
else
return writeNumber2(dest, month);
}
static size_t monthOfYearText(char * dest, Time source, bool abbreviate, UInt64, UInt32, const DateLUTImpl & timezone)
{
auto month = ToMonthImpl::execute(source, timezone);
@ -404,10 +416,36 @@ private:
return writeNumber2(dest, ToHourImpl::execute(source, timezone));
}
size_t mysqlHour24WithoutLeadingZero(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
{
auto hour = ToHourImpl::execute(source, timezone);
if (hour < 10)
{
dest[0] = '0' + hour;
return 1;
}
else
return writeNumber2(dest, hour);
}
size_t mysqlHour12(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
{
auto x = ToHourImpl::execute(source, timezone);
return writeNumber2(dest, x == 0 ? 12 : (x > 12 ? x - 12 : x));
auto hour = ToHourImpl::execute(source, timezone);
hour = (hour == 0) ? 12 : (hour > 12 ? hour - 12 : hour);
return writeNumber2(dest, hour);
}
size_t mysqlHour12WithoutLeadingZero(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
{
auto hour = ToHourImpl::execute(source, timezone);
hour = hour == 0 ? 12 : (hour > 12 ? hour - 12 : hour);
if (hour < 10)
{
dest[0] = '0' + hour;
return 1;
}
else
return writeNumber2(dest, hour);
}
size_t mysqlMinute(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
@ -689,10 +727,11 @@ private:
throw Exception(ErrorCodes::BAD_ARGUMENTS, "'%' must not be the last character in the format string, use '%%' instead");
}
static bool containsOnlyFixedWidthMySQLFormatters(std::string_view format, bool mysql_M_is_month_name)
static bool containsOnlyFixedWidthMySQLFormatters(std::string_view format, bool mysql_M_is_month_name, bool mysql_format_ckl_without_leading_zeros)
{
static constexpr std::array variable_width_formatter = {'W'};
static constexpr std::array variable_width_formatter_M_is_month_name = {'W', 'M'};
static constexpr std::array variable_width_formatter_leading_zeros = {'c', 'l', 'k'};
for (size_t i = 0; i < format.size(); ++i)
{
@ -708,6 +747,13 @@ private:
[&](char c){ return c == format[i + 1]; }))
return false;
}
if (mysql_format_ckl_without_leading_zeros)
{
if (std::any_of(
variable_width_formatter_leading_zeros.begin(), variable_width_formatter_leading_zeros.end(),
[&](char c){ return c == format[i + 1]; }))
return false;
}
else
{
if (std::any_of(
@ -727,6 +773,7 @@ private:
const bool mysql_M_is_month_name;
const bool mysql_f_prints_single_zero;
const bool mysql_format_ckl_without_leading_zeros;
public:
static constexpr auto name = Name::name;
@ -736,6 +783,7 @@ public:
explicit FunctionFormatDateTimeImpl(ContextPtr context)
: mysql_M_is_month_name(context->getSettings().formatdatetime_parsedatetime_m_is_month_name)
, mysql_f_prints_single_zero(context->getSettings().formatdatetime_f_prints_single_zero)
, mysql_format_ckl_without_leading_zeros(context->getSettings().formatdatetime_format_without_leading_zeros)
{
}
@ -885,7 +933,7 @@ public:
/// column rows are NOT populated with the template and left uninitialized. We run the normal instructions for formatters AND
/// instructions that copy literal characters before/between/after formatters. As a result, each byte of each result row is
/// written which is obviously slow.
bool mysql_with_only_fixed_length_formatters = (format_syntax == FormatSyntax::MySQL) ? containsOnlyFixedWidthMySQLFormatters(format, mysql_M_is_month_name) : false;
bool mysql_with_only_fixed_length_formatters = (format_syntax == FormatSyntax::MySQL) ? containsOnlyFixedWidthMySQLFormatters(format, mysql_M_is_month_name, mysql_format_ckl_without_leading_zeros) : false;
using T = typename InstructionValueTypeMap<DataType>::InstructionValueType;
std::vector<Instruction<T>> instructions;
@ -1077,12 +1125,22 @@ public:
break;
}
// Month as a integer number (01-12)
// Month as a integer number:
// - if formatdatetime_format_without_leading_zeros = true: prints without leading zero, i.e. 1-12
// - otherwise: print with leading zeros: i.e. 01-12
case 'c':
{
Instruction<T> instruction;
instruction.setMysqlFunc(&Instruction<T>::mysqlMonth);
instructions.push_back(std::move(instruction));
if (mysql_format_ckl_without_leading_zeros)
{
instruction.setMysqlFunc(&Instruction<T>::mysqlMonthWithoutLeadingZero);
instructions.push_back(std::move(instruction));
}
else
{
instruction.setMysqlFunc(&Instruction<T>::mysqlMonth);
instructions.push_back(std::move(instruction));
}
out_template += "00";
break;
}
@ -1391,20 +1449,30 @@ public:
break;
}
// Hour in 24h format (00-23)
// Hour in 24h format:
// - if formatdatetime_format_without_leading_zeros = true: prints without leading zero, i.e. 0-23
// - otherwise: print with leading zeros: i.e. 00-23
case 'k':
{
static constexpr std::string_view val = "00";
add_time_instruction(&Instruction<T>::mysqlHour24, val);
if (mysql_format_ckl_without_leading_zeros)
add_time_instruction(&Instruction<T>::mysqlHour24WithoutLeadingZero, val);
else
add_time_instruction(&Instruction<T>::mysqlHour24, val);
out_template += val;
break;
}
// Hour in 12h format (01-12)
// Hour in 12h format:
// - if formatdatetime_format_without_leading_zeros = true: prints without leading zero, i.e. 0-12
// - otherwise: print with leading zeros: i.e. 00-12
case 'l':
{
static constexpr std::string_view val = "12";
add_time_instruction(&Instruction<T>::mysqlHour12, val);
if (mysql_format_ckl_without_leading_zeros)
add_time_instruction(&Instruction<T>::mysqlHour12WithoutLeadingZero, val);
else
add_time_instruction(&Instruction<T>::mysqlHour12, val);
out_template += val;
break;
}

View File

@ -0,0 +1,116 @@
#include <Functions/IFunction.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <DataTypes/DataTypeString.h>
#include <Columns/ColumnString.h>
#include <Interpreters/Context.h>
#include <Common/CurrentThread.h>
#include "Disks/DiskType.h"
#include "Interpreters/Context_fwd.h"
#include <Core/Field.h>
#include <Poco/Net/NameValueCollection.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int ILLEGAL_COLUMN;
extern const int FUNCTION_NOT_ALLOWED;
extern const int BAD_ARGUMENTS;
}
namespace
{
/** Get the value of parameter in http headers.
* If there no such parameter or the method of request is not
* http, the function will throw an exception.
*/
class FunctionGetClientHTTPHeader : public IFunction, WithContext
{
private:
public:
explicit FunctionGetClientHTTPHeader(ContextPtr context_): WithContext(context_) {}
static constexpr auto name = "getClientHTTPHeader";
static FunctionPtr create(ContextPtr context_)
{
return std::make_shared<FunctionGetClientHTTPHeader>(context_);
}
bool useDefaultImplementationForConstants() const override { return true; }
String getName() const override { return name; }
bool isDeterministic() const override { return false; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
size_t getNumberOfArguments() const override
{
return 1;
}
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (!getContext()->allowGetHTTPHeaderFunction())
throw Exception(ErrorCodes::FUNCTION_NOT_ALLOWED, "The function {} is not enabled, you can set allow_get_client_http_header in config file.", getName());
if (!isString(arguments[0]))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The argument of function {} must have String type", getName());
return std::make_shared<DataTypeString>();
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
{
const auto & client_info = getContext()->getClientInfo();
const auto & method = client_info.http_method;
const auto & headers = client_info.headers;
const IColumn * arg_column = arguments[0].column.get();
const ColumnString * arg_string = checkAndGetColumn<ColumnString>(arg_column);
if (!arg_string)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "The argument of function {} must be constant String", getName());
if (method != ClientInfo::HTTPMethod::GET && method != ClientInfo::HTTPMethod::POST)
return result_type->createColumnConstWithDefaultValue(input_rows_count);
auto result_column = ColumnString::create();
const String default_value;
const std::unordered_set<String> & forbidden_header_list = getContext()->getClientHTTPHeaderForbiddenHeaders();
for (size_t row = 0; row < input_rows_count; ++row)
{
auto header_name = arg_string->getDataAt(row).toString();
if (!headers.has(header_name))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "{} is not in HTTP request headers.", header_name);
else
{
auto it = forbidden_header_list.find(header_name);
if (it != forbidden_header_list.end())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The header {} is in get_client_http_header_forbidden_headers, you can config it in config file.", header_name);
const String & value = headers[header_name];
result_column->insertData(value.data(), value.size());
}
}
return result_column;
}
};
}
REGISTER_FUNCTION(GetHttpHeader)
{
factory.registerFunction<FunctionGetClientHTTPHeader>();
}
}

View File

@ -35,7 +35,7 @@ namespace ErrorCodes
* https://github.com/sphinxsearch/sphinx/blob/409f2c2b5b2ff70b04e38f92b6b1a890326bad65/src/sphinxexpr.cpp#L3825.
* Andrey Aksenov, the author of original code, permitted to use this code in ClickHouse under the Apache 2.0 license.
* Presentation about this code from Highload++ Siberia 2019 is here https://github.com/ClickHouse/ClickHouse/files/3324740/1_._._GEODIST_._.pdf
* The main idea of this implementation is optimisations based on Taylor series, trigonometric identity
* The main idea of this implementation is optimizations based on Taylor series, trigonometric identity
* and calculated constants once for cosine, arcsine(sqrt) and look up table.
*/

View File

@ -466,12 +466,14 @@ namespace
{
public:
const bool mysql_M_is_month_name;
const bool mysql_parse_ckl_without_leading_zeros;
static constexpr auto name = Name::name;
static FunctionPtr create(ContextPtr context) { return std::make_shared<FunctionParseDateTimeImpl>(context); }
explicit FunctionParseDateTimeImpl(ContextPtr context)
: mysql_M_is_month_name(context->getSettings().formatdatetime_parsedatetime_m_is_month_name)
, mysql_parse_ckl_without_leading_zeros(context->getSettings().parsedatetime_parse_without_leading_zeros)
{
}
@ -835,6 +837,14 @@ namespace
return cur;
}
static Pos mysqlMonthWithoutLeadingZero(Pos cur, Pos end, const String & fragment, DateTime & date)
{
Int32 month;
cur = readNumberWithVariableLength(cur, end, false, false, false, 1, 2, fragment, month);
date.setMonth(month);
return cur;
}
static Pos mysqlCentury(Pos cur, Pos end, const String & fragment, DateTime & date)
{
Int32 century;
@ -1131,6 +1141,14 @@ namespace
return cur;
}
static Pos mysqlHour12WithoutLeadingZero(Pos cur, Pos end, const String & fragment, DateTime & date)
{
Int32 hour;
cur = readNumberWithVariableLength(cur, end, false, false, false, 1, 2, fragment, hour);
date.setHour(hour, true, true);
return cur;
}
static Pos mysqlHour24(Pos cur, Pos end, const String & fragment, DateTime & date)
{
Int32 hour;
@ -1139,6 +1157,14 @@ namespace
return cur;
}
static Pos mysqlHour24WithoutLeadingZero(Pos cur, Pos end, const String & fragment, DateTime & date)
{
Int32 hour;
cur = readNumberWithVariableLength(cur, end, false, false, false, 1, 2, fragment, hour);
date.setHour(hour, false, false);
return cur;
}
static Pos readNumberWithVariableLength(
Pos cur,
Pos end,
@ -1490,9 +1516,14 @@ namespace
instructions.emplace_back(ACTION_ARGS(Instruction::mysqlMonthOfYearTextShort));
break;
// Month as a decimal number (01-12)
// Month as a decimal number:
// - if parsedatetime_parse_without_leading_zeros = true: possibly without leading zero, i.e. 1-12
// - else: with leading zero required, i.e. 01-12
case 'c':
instructions.emplace_back(ACTION_ARGS(Instruction::mysqlMonth));
if (mysql_parse_ckl_without_leading_zeros)
instructions.emplace_back(ACTION_ARGS(Instruction::mysqlMonthWithoutLeadingZero));
else
instructions.emplace_back(ACTION_ARGS(Instruction::mysqlMonth));
break;
// Year, divided by 100, zero-padded
@ -1645,14 +1676,24 @@ namespace
instructions.emplace_back(ACTION_ARGS(Instruction::mysqlHour12));
break;
// Hour in 24h format (00-23)
// Hour in 24h format:
// - if parsedatetime_parse_without_leading_zeros = true, possibly without leading zero: i.e. 0-23
// - else with leading zero required: i.e. 00-23
case 'k':
instructions.emplace_back(ACTION_ARGS(Instruction::mysqlHour24));
if (mysql_parse_ckl_without_leading_zeros)
instructions.emplace_back(ACTION_ARGS(Instruction::mysqlHour24WithoutLeadingZero));
else
instructions.emplace_back(ACTION_ARGS(Instruction::mysqlHour24));
break;
// Hour in 12h format (01-12)
// Hour in 12h format:
// - if parsedatetime_parse_without_leading_zeros = true: possibly without leading zero, i.e. 0-12
// - else with leading zero required: i.e. 00-12
case 'l':
instructions.emplace_back(ACTION_ARGS(Instruction::mysqlHour12));
if (mysql_parse_ckl_without_leading_zeros)
instructions.emplace_back(ACTION_ARGS(Instruction::mysqlHour12WithoutLeadingZero));
else
instructions.emplace_back(ACTION_ARGS(Instruction::mysqlHour12));
break;
case 't':

View File

@ -196,7 +196,7 @@ struct PoissonDistribution
* Accepts only constant arguments
* Similar to the functions rand and rand64 an additional 'tag' argument could be added to the
* end of arguments list (this argument will be ignored) which will guarantee that functions are not sticked together
* during optimisations.
* during optimizations.
* Example: SELECT randNormal(0, 1, 1), randNormal(0, 1, 2) FROM numbers(10)
* This query will return two different columns
*/

View File

@ -103,6 +103,20 @@ void verifyClientConfiguration(const Aws::Client::ClientConfiguration & client_c
assert_cast<const Client::RetryStrategy &>(*client_config.retryStrategy);
}
void addAdditionalAMZHeadersToCanonicalHeadersList(
Aws::AmazonWebServiceRequest & request,
const HTTPHeaderEntries & extra_headers
)
{
for (const auto & [name, value] : extra_headers)
{
if (name.starts_with("x-amz-"))
{
request.SetAdditionalCustomHeaderValue(name, value);
}
}
}
}
std::unique_ptr<Client> Client::create(
@ -265,12 +279,14 @@ template void Client::setKMSHeaders<CreateMultipartUploadRequest>(CreateMultipar
template void Client::setKMSHeaders<CopyObjectRequest>(CopyObjectRequest & request) const;
template void Client::setKMSHeaders<PutObjectRequest>(PutObjectRequest & request) const;
Model::HeadObjectOutcome Client::HeadObject(const HeadObjectRequest & request) const
Model::HeadObjectOutcome Client::HeadObject(HeadObjectRequest & request) const
{
const auto & bucket = request.GetBucket();
request.setApiMode(api_mode);
addAdditionalAMZHeadersToCanonicalHeadersList(request, client_configuration.extra_headers);
if (auto region = getRegionForBucket(bucket); !region.empty())
{
if (!detect_region)
@ -346,36 +362,36 @@ Model::HeadObjectOutcome Client::HeadObject(const HeadObjectRequest & request) c
/// For each request, we wrap the request functions from Aws::S3::Client with doRequest
/// doRequest calls virtuall function from Aws::S3::Client while DB::S3::Client has not virtual calls for each request type
Model::ListObjectsV2Outcome Client::ListObjectsV2(const ListObjectsV2Request & request) const
Model::ListObjectsV2Outcome Client::ListObjectsV2(ListObjectsV2Request & request) const
{
return doRequestWithRetryNetworkErrors</*IsReadMethod*/ true>(
request, [this](const Model::ListObjectsV2Request & req) { return ListObjectsV2(req); });
}
Model::ListObjectsOutcome Client::ListObjects(const ListObjectsRequest & request) const
Model::ListObjectsOutcome Client::ListObjects(ListObjectsRequest & request) const
{
return doRequestWithRetryNetworkErrors</*IsReadMethod*/ true>(
request, [this](const Model::ListObjectsRequest & req) { return ListObjects(req); });
}
Model::GetObjectOutcome Client::GetObject(const GetObjectRequest & request) const
Model::GetObjectOutcome Client::GetObject(GetObjectRequest & request) const
{
return doRequest(request, [this](const Model::GetObjectRequest & req) { return GetObject(req); });
}
Model::AbortMultipartUploadOutcome Client::AbortMultipartUpload(const AbortMultipartUploadRequest & request) const
Model::AbortMultipartUploadOutcome Client::AbortMultipartUpload(AbortMultipartUploadRequest & request) const
{
return doRequestWithRetryNetworkErrors</*IsReadMethod*/ false>(
request, [this](const Model::AbortMultipartUploadRequest & req) { return AbortMultipartUpload(req); });
}
Model::CreateMultipartUploadOutcome Client::CreateMultipartUpload(const CreateMultipartUploadRequest & request) const
Model::CreateMultipartUploadOutcome Client::CreateMultipartUpload(CreateMultipartUploadRequest & request) const
{
return doRequestWithRetryNetworkErrors</*IsReadMethod*/ false>(
request, [this](const Model::CreateMultipartUploadRequest & req) { return CreateMultipartUpload(req); });
}
Model::CompleteMultipartUploadOutcome Client::CompleteMultipartUpload(const CompleteMultipartUploadRequest & request) const
Model::CompleteMultipartUploadOutcome Client::CompleteMultipartUpload(CompleteMultipartUploadRequest & request) const
{
auto outcome = doRequestWithRetryNetworkErrors</*IsReadMethod*/ false>(
request, [this](const Model::CompleteMultipartUploadRequest & req) { return CompleteMultipartUpload(req); });
@ -422,43 +438,43 @@ Model::CompleteMultipartUploadOutcome Client::CompleteMultipartUpload(const Comp
return outcome;
}
Model::CopyObjectOutcome Client::CopyObject(const CopyObjectRequest & request) const
Model::CopyObjectOutcome Client::CopyObject(CopyObjectRequest & request) const
{
return doRequestWithRetryNetworkErrors</*IsReadMethod*/ false>(
request, [this](const Model::CopyObjectRequest & req) { return CopyObject(req); });
}
Model::PutObjectOutcome Client::PutObject(const PutObjectRequest & request) const
Model::PutObjectOutcome Client::PutObject(PutObjectRequest & request) const
{
return doRequestWithRetryNetworkErrors</*IsReadMethod*/ false>(
request, [this](const Model::PutObjectRequest & req) { return PutObject(req); });
}
Model::UploadPartOutcome Client::UploadPart(const UploadPartRequest & request) const
Model::UploadPartOutcome Client::UploadPart(UploadPartRequest & request) const
{
return doRequestWithRetryNetworkErrors</*IsReadMethod*/ false>(
request, [this](const Model::UploadPartRequest & req) { return UploadPart(req); });
}
Model::UploadPartCopyOutcome Client::UploadPartCopy(const UploadPartCopyRequest & request) const
Model::UploadPartCopyOutcome Client::UploadPartCopy(UploadPartCopyRequest & request) const
{
return doRequestWithRetryNetworkErrors</*IsReadMethod*/ false>(
request, [this](const Model::UploadPartCopyRequest & req) { return UploadPartCopy(req); });
}
Model::DeleteObjectOutcome Client::DeleteObject(const DeleteObjectRequest & request) const
Model::DeleteObjectOutcome Client::DeleteObject(DeleteObjectRequest & request) const
{
return doRequestWithRetryNetworkErrors</*IsReadMethod*/ false>(
request, [this](const Model::DeleteObjectRequest & req) { return DeleteObject(req); });
}
Model::DeleteObjectsOutcome Client::DeleteObjects(const DeleteObjectsRequest & request) const
Model::DeleteObjectsOutcome Client::DeleteObjects(DeleteObjectsRequest & request) const
{
return doRequestWithRetryNetworkErrors</*IsReadMethod*/ false>(
request, [this](const Model::DeleteObjectsRequest & req) { return DeleteObjects(req); });
}
Client::ComposeObjectOutcome Client::ComposeObject(const ComposeObjectRequest & request) const
Client::ComposeObjectOutcome Client::ComposeObject(ComposeObjectRequest & request) const
{
auto request_fn = [this](const ComposeObjectRequest & req)
{
@ -490,8 +506,9 @@ Client::ComposeObjectOutcome Client::ComposeObject(const ComposeObjectRequest &
template <typename RequestType, typename RequestFn>
std::invoke_result_t<RequestFn, RequestType>
Client::doRequest(const RequestType & request, RequestFn request_fn) const
Client::doRequest(RequestType & request, RequestFn request_fn) const
{
addAdditionalAMZHeadersToCanonicalHeadersList(request, client_configuration.extra_headers);
const auto & bucket = request.GetBucket();
request.setApiMode(api_mode);
@ -568,8 +585,9 @@ Client::doRequest(const RequestType & request, RequestFn request_fn) const
template <bool IsReadMethod, typename RequestType, typename RequestFn>
std::invoke_result_t<RequestFn, RequestType>
Client::doRequestWithRetryNetworkErrors(const RequestType & request, RequestFn request_fn) const
Client::doRequestWithRetryNetworkErrors(RequestType & request, RequestFn request_fn) const
{
addAdditionalAMZHeadersToCanonicalHeadersList(request, client_configuration.extra_headers);
auto with_retries = [this, request_fn_ = std::move(request_fn)] (const RequestType & request_)
{
chassert(client_configuration.retryStrategy);
@ -666,6 +684,8 @@ std::string Client::getRegionForBucket(const std::string & bucket, bool force_de
Aws::S3::Model::HeadBucketRequest req;
req.SetBucket(bucket);
addAdditionalAMZHeadersToCanonicalHeadersList(req, client_configuration.extra_headers);
std::string region;
auto outcome = HeadBucket(req);
if (outcome.IsSuccess())

View File

@ -177,24 +177,24 @@ public:
template <typename RequestType>
void setKMSHeaders(RequestType & request) const;
Model::HeadObjectOutcome HeadObject(const HeadObjectRequest & request) const;
Model::ListObjectsV2Outcome ListObjectsV2(const ListObjectsV2Request & request) const;
Model::ListObjectsOutcome ListObjects(const ListObjectsRequest & request) const;
Model::GetObjectOutcome GetObject(const GetObjectRequest & request) const;
Model::HeadObjectOutcome HeadObject(HeadObjectRequest & request) const;
Model::ListObjectsV2Outcome ListObjectsV2(ListObjectsV2Request & request) const;
Model::ListObjectsOutcome ListObjects(ListObjectsRequest & request) const;
Model::GetObjectOutcome GetObject(GetObjectRequest & request) const;
Model::AbortMultipartUploadOutcome AbortMultipartUpload(const AbortMultipartUploadRequest & request) const;
Model::CreateMultipartUploadOutcome CreateMultipartUpload(const CreateMultipartUploadRequest & request) const;
Model::CompleteMultipartUploadOutcome CompleteMultipartUpload(const CompleteMultipartUploadRequest & request) const;
Model::UploadPartOutcome UploadPart(const UploadPartRequest & request) const;
Model::UploadPartCopyOutcome UploadPartCopy(const UploadPartCopyRequest & request) const;
Model::AbortMultipartUploadOutcome AbortMultipartUpload(AbortMultipartUploadRequest & request) const;
Model::CreateMultipartUploadOutcome CreateMultipartUpload(CreateMultipartUploadRequest & request) const;
Model::CompleteMultipartUploadOutcome CompleteMultipartUpload(CompleteMultipartUploadRequest & request) const;
Model::UploadPartOutcome UploadPart(UploadPartRequest & request) const;
Model::UploadPartCopyOutcome UploadPartCopy(UploadPartCopyRequest & request) const;
Model::CopyObjectOutcome CopyObject(const CopyObjectRequest & request) const;
Model::PutObjectOutcome PutObject(const PutObjectRequest & request) const;
Model::DeleteObjectOutcome DeleteObject(const DeleteObjectRequest & request) const;
Model::DeleteObjectsOutcome DeleteObjects(const DeleteObjectsRequest & request) const;
Model::CopyObjectOutcome CopyObject(CopyObjectRequest & request) const;
Model::PutObjectOutcome PutObject(PutObjectRequest & request) const;
Model::DeleteObjectOutcome DeleteObject(DeleteObjectRequest & request) const;
Model::DeleteObjectsOutcome DeleteObjects(DeleteObjectsRequest & request) const;
using ComposeObjectOutcome = Aws::Utils::Outcome<Aws::NoResult, Aws::S3::S3Error>;
ComposeObjectOutcome ComposeObject(const ComposeObjectRequest & request) const;
ComposeObjectOutcome ComposeObject(ComposeObjectRequest & request) const;
using Aws::S3::S3Client::EnableRequestProcessing;
using Aws::S3::S3Client::DisableRequestProcessing;
@ -236,11 +236,11 @@ private:
template <typename RequestType, typename RequestFn>
std::invoke_result_t<RequestFn, RequestType>
doRequest(const RequestType & request, RequestFn request_fn) const;
doRequest(RequestType & request, RequestFn request_fn) const;
template <bool IsReadMethod, typename RequestType, typename RequestFn>
std::invoke_result_t<RequestFn, RequestType>
doRequestWithRetryNetworkErrors(const RequestType & request, RequestFn request_fn) const;
doRequestWithRetryNetworkErrors(RequestType & request, RequestFn request_fn) const;
void updateURIForBucket(const std::string & bucket, S3::URI new_uri) const;
std::optional<S3::URI> getURIFromError(const Aws::S3::S3Error & error) const;

View File

@ -649,7 +649,7 @@ Aws::String SSOCredentialsProvider::loadAccessTokenFile(const Aws::String & sso_
}
else
{
LOG_TRACE(logger, "Unable to open token file on path: {}", sso_access_token_path);
LOG_TEST(logger, "Unable to open token file on path: {}", sso_access_token_path);
return "";
}
}

View File

@ -469,7 +469,17 @@ void PocoHTTPClient::makeRequestInternalImpl(
for (const auto & [header_name, header_value] : request.GetHeaders())
poco_request.set(header_name, header_value);
for (const auto & [header_name, header_value] : extra_headers)
poco_request.set(boost::algorithm::to_lower_copy(header_name), header_value);
{
// AWS S3 canonical headers must include `Host`, `Content-Type` and any `x-amz-*`.
// These headers will be signed. Custom S3 headers specified in ClickHouse storage conf are added in `extra_headers`.
// At this point in the stack trace, request has already been signed and any `x-amz-*` extra headers was already added
// to the canonical headers list. Therefore, we should not add them again to the request.
// https://docs.aws.amazon.com/AmazonS3/latest/API/sig-v4-header-based-auth.html
if (!header_name.starts_with("x-amz-"))
{
poco_request.set(boost::algorithm::to_lower_copy(header_name), header_value);
}
}
Poco::Net::HTTPResponse poco_response;

View File

@ -512,7 +512,7 @@ namespace
client_ptr->setKMSHeaders(request);
}
void processPutRequest(const S3::PutObjectRequest & request)
void processPutRequest(S3::PutObjectRequest & request)
{
size_t max_retries = std::max(request_settings.max_unexpected_write_error_retries, 1UL);
for (size_t retries = 1;; ++retries)
@ -709,7 +709,7 @@ namespace
client_ptr->setKMSHeaders(request);
}
void processCopyRequest(const S3::CopyObjectRequest & request)
void processCopyRequest(S3::CopyObjectRequest & request)
{
size_t max_retries = std::max(request_settings.max_unexpected_write_error_retries, 1UL);
for (size_t retries = 1;; ++retries)

View File

@ -176,7 +176,10 @@ TEST(IOTestAwsS3Client, AppendExtraSSECHeadersRead)
"host;"
"x-amz-api-version;"
"x-amz-content-sha256;"
"x-amz-date, ...\n"
"x-amz-date;"
"x-amz-server-side-encryption-customer-algorithm;"
"x-amz-server-side-encryption-customer-key;"
"x-amz-server-side-encryption-customer-key-md5, ...\n"
"x-amz-server-side-encryption-customer-algorithm: AES256\n"
"x-amz-server-side-encryption-customer-key: Kv/gDqdWVGIT4iDqg+btQvV3lc1idlm4WI+MMOyHOAw=\n"
"x-amz-server-side-encryption-customer-key-md5: fMNuOw6OLU5GG2vc6RTA+g==\n");
@ -197,7 +200,10 @@ TEST(IOTestAwsS3Client, AppendExtraSSECHeadersWrite)
"content-type;"
"host;"
"x-amz-content-sha256;"
"x-amz-date, ...\n"
"x-amz-date;"
"x-amz-server-side-encryption-customer-algorithm;"
"x-amz-server-side-encryption-customer-key;"
"x-amz-server-side-encryption-customer-key-md5, ...\n"
"x-amz-server-side-encryption-customer-algorithm: AES256\n"
"x-amz-server-side-encryption-customer-key: Kv/gDqdWVGIT4iDqg+btQvV3lc1idlm4WI+MMOyHOAw=\n"
"x-amz-server-side-encryption-customer-key-md5: fMNuOw6OLU5GG2vc6RTA+g==\n");

View File

@ -1446,18 +1446,15 @@ void NO_INLINE Aggregator::executeOnIntervalWithoutKey(
for (AggregateFunctionInstruction * inst = aggregate_instructions; inst->that; ++inst)
{
if (inst->offsets)
inst->batch_that->addBatchSinglePlaceFromInterval(
inst->batch_that->addBatchSinglePlace(
inst->offsets[static_cast<ssize_t>(row_begin) - 1],
inst->offsets[row_end - 1],
res + inst->state_offset,
inst->batch_arguments, data_variants.aggregates_pool);
else
inst->batch_that->addBatchSinglePlaceFromInterval(
row_begin,
row_end,
res + inst->state_offset,
inst->batch_arguments,
data_variants.aggregates_pool);
else
inst->batch_that->addBatchSinglePlace(
row_begin, row_end, res + inst->state_offset, inst->batch_arguments, data_variants.aggregates_pool);
}
}

View File

@ -206,10 +206,19 @@ Block ArrayJoinResultIterator::next()
bool is_left = array_join->is_left;
auto cut_any_col = any_array->cut(current_row, next_row - current_row);
const auto * cut_any_array = typeid_cast<const ColumnArray *>(cut_any_col.get());
for (size_t i = 0; i < num_columns; ++i)
{
ColumnWithTypeAndName current = block.safeGetByPosition(i);
current.column = current.column->cut(current_row, next_row - current_row);
/// Reuse cut_any_col if possible to avoid unnecessary cut.
if (!is_unaligned && !is_left && current.name == *columns.begin())
{
current.column = cut_any_col;
current.type = getArrayJoinDataType(current.type);
}
else
current.column = current.column->cut(current_row, next_row - current_row);
if (columns.contains(current.name))
{

View File

@ -2,6 +2,7 @@
#include <Core/UUID.h>
#include <Poco/Net/SocketAddress.h>
#include <Poco/Net/NameValueCollection.h>
#include <base/types.h>
#include <Common/OpenTelemetryTraceContext.h>
#include <Common/VersionNumber.h>
@ -96,6 +97,7 @@ public:
/// For mysql and postgresql
UInt64 connection_id = 0;
Poco::Net::NameValueCollection headers;
/// Comma separated list of forwarded IP addresses (from X-Forwarded-For for HTTP interface).
/// It's expected that proxy appends the forwarded address to the end of the list.

View File

@ -3,6 +3,7 @@
#include <optional>
#include <memory>
#include <Poco/UUID.h>
#include <Poco/Net/NameValueCollection.h>
#include <Poco/Util/Application.h>
#include <Common/SensitiveDataMasker.h>
#include <Common/Macros.h>
@ -322,9 +323,12 @@ struct ContextSharedPart : boost::noncopyable
std::optional<MergeTreeSettings> merge_tree_settings TSA_GUARDED_BY(mutex); /// Settings of MergeTree* engines.
std::optional<MergeTreeSettings> replicated_merge_tree_settings TSA_GUARDED_BY(mutex); /// Settings of ReplicatedMergeTree* engines.
std::atomic_size_t max_table_size_to_drop = 50000000000lu; /// Protects MergeTree tables from accidental DROP (50GB by default)
std::unordered_set<String> get_client_http_header_forbidden_headers;
bool allow_get_client_http_header;
std::atomic_size_t max_partition_size_to_drop = 50000000000lu; /// Protects MergeTree partitions from accidental DROP (50GB by default)
/// No lock required for format_schema_path modified only during initialization
String format_schema_path; /// Path to a directory that contains schema files used by input formats.
String google_protos_path; /// Path to a directory that contains the proto files for the well-known Protobuf types.
mutable OnceFlag action_locks_manager_initialized;
ActionLocksManagerPtr action_locks_manager; /// Set of storages' action lockers
OnceFlag system_logs_initialized;
@ -3950,6 +3954,28 @@ void Context::checkTableCanBeDropped(const String & database, const String & tab
}
void Context::setClientHTTPHeaderForbiddenHeaders(const String & forbidden_headers)
{
std::unordered_set<String> forbidden_header_list;
boost::split(forbidden_header_list, forbidden_headers, [](char c) { return c == ','; });
shared->get_client_http_header_forbidden_headers = forbidden_header_list;
}
void Context::setAllowGetHTTPHeaderFunction(bool allow_get_http_header_function)
{
shared->allow_get_client_http_header= allow_get_http_header_function;
}
const std::unordered_set<String> & Context::getClientHTTPHeaderForbiddenHeaders() const
{
return shared->get_client_http_header_forbidden_headers;
}
bool Context::allowGetHTTPHeaderFunction() const
{
return shared->allow_get_client_http_header;
}
void Context::setMaxPartitionSizeToDrop(size_t max_size)
{
// Is initialized at server startup and updated at config reload
@ -4116,6 +4142,16 @@ void Context::setFormatSchemaPath(const String & path)
shared->format_schema_path = path;
}
String Context::getGoogleProtosPath() const
{
return shared->google_protos_path;
}
void Context::setGoogleProtosPath(const String & path)
{
shared->google_protos_path = path;
}
Context::SampleBlockCache & Context::getSampleBlockCache() const
{
assert(hasQueryContext());
@ -4270,12 +4306,15 @@ void Context::setClientConnectionId(uint32_t connection_id_)
client_info.connection_id = connection_id_;
}
void Context::setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer)
void Context::setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer, const Poco::Net::NameValueCollection & http_headers)
{
client_info.http_method = http_method;
client_info.http_user_agent = http_user_agent;
client_info.http_referer = http_referer;
need_recalculate_access = true;
if (!http_headers.empty())
client_info.headers = http_headers;
}
void Context::setForwardedFor(const String & forwarded_for)

View File

@ -26,6 +26,8 @@
#include <Server/HTTP/HTTPContext.h>
#include <Storages/ColumnsDescription.h>
#include <Storages/IStorage_fwd.h>
#include <Poco/Net/NameValueCollection.h>
#include <Core/Types.h>
#include "config.h"
@ -640,7 +642,7 @@ public:
void setClientInterface(ClientInfo::Interface interface);
void setClientVersion(UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version);
void setClientConnectionId(uint32_t connection_id);
void setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer);
void setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer, const Poco::Net::NameValueCollection & http_headers = {});
void setForwardedFor(const String & forwarded_for);
void setQueryKind(ClientInfo::QueryKind query_kind);
void setQueryKindInitial();
@ -1073,6 +1075,11 @@ public:
/// Prevents DROP TABLE if its size is greater than max_size (50GB by default, max_size=0 turn off this check)
void setMaxTableSizeToDrop(size_t max_size);
size_t getMaxTableSizeToDrop() const;
void setClientHTTPHeaderForbiddenHeaders(const String & forbidden_headers);
/// Return the forbiddent headers that users can't get via getClientHTTPHeader function
const std::unordered_set<String> & getClientHTTPHeaderForbiddenHeaders() const;
void setAllowGetHTTPHeaderFunction(bool allow_get_http_header_function);
bool allowGetHTTPHeaderFunction() const;
void checkTableCanBeDropped(const String & database, const String & table, const size_t & table_size) const;
/// Prevents DROP PARTITION if its size is greater than max_size (50GB by default, max_size=0 turn off this check)
@ -1140,6 +1147,10 @@ public:
String getFormatSchemaPath() const;
void setFormatSchemaPath(const String & path);
/// Path to the folder containing the proto files for the well-known Protobuf types
String getGoogleProtosPath() const;
void setGoogleProtosPath(const String & path);
SampleBlockCache & getSampleBlockCache() const;
/// Query parameters for prepared statements.

View File

@ -1050,6 +1050,9 @@ Block InterpreterSelectQuery::getSampleBlockImpl()
if (analysis_result.before_window)
return analysis_result.before_window->getResultColumns();
// NOTE: should not handle before_limit_by specially since
// WithMergeableState does not process LIMIT BY
return analysis_result.before_order_by->getResultColumns();
}
@ -1093,6 +1096,12 @@ Block InterpreterSelectQuery::getSampleBlockImpl()
if (analysis_result.before_window)
return analysis_result.before_window->getResultColumns();
// In case of query on remote shards executed up to
// WithMergeableStateAfterAggregation*, they can process LIMIT BY,
// since the initiator will not apply LIMIT BY again.
if (analysis_result.before_limit_by)
return analysis_result.before_limit_by->getResultColumns();
return analysis_result.before_order_by->getResultColumns();
}
@ -1539,7 +1548,11 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
if (query.limitLength())
executeDistinct(query_plan, false, expressions.selected_columns, false);
if (expressions.hasLimitBy())
/// In case of query executed on remote shards (up to
/// WithMergeableState*) LIMIT BY cannot be applied, since it
/// will be applied on the initiator as well, and the header
/// may not match in some obscure cases.
if (options.to_stage == QueryProcessingStage::FetchColumns && expressions.hasLimitBy())
{
executeExpression(query_plan, expressions.before_limit_by, "Before LIMIT BY");
executeLimitBy(query_plan);

View File

@ -690,9 +690,15 @@ void MutationsInterpreter::prepare(bool dry_run)
{
if (column.default_desc.kind == ColumnDefaultKind::Materialized)
{
auto type_literal = std::make_shared<ASTLiteral>(column.type->getName());
auto materialized_column = makeASTFunction("_CAST",
column.default_desc.expression->clone(),
type_literal);
stages.back().column_to_updated.emplace(
column.name,
column.default_desc.expression->clone());
materialized_column);
}
}
}

View File

@ -183,14 +183,11 @@ ProcessList::insert(const String & query_, const IAST * ast, ContextMutablePtr q
}
/// Check other users running query with our query_id
for (const auto & user_process_list : user_to_queries)
if (auto query_user = queries_to_user.find(client_info.current_query_id); query_user != queries_to_user.end() && query_user->second != client_info.current_user)
{
if (user_process_list.first == client_info.current_user)
continue;
if (auto running_query = user_process_list.second.queries.find(client_info.current_query_id); running_query != user_process_list.second.queries.end())
throw Exception(ErrorCodes::QUERY_WITH_SAME_ID_IS_ALREADY_RUNNING,
"Query with id = {} is already running by user {}",
client_info.current_query_id, user_process_list.first);
throw Exception(ErrorCodes::QUERY_WITH_SAME_ID_IS_ALREADY_RUNNING,
"Query with id = {} is already running by user {}",
client_info.current_query_id, query_user->second);
}
auto user_process_list_it = user_to_queries.find(client_info.current_user);
@ -259,6 +256,7 @@ ProcessList::insert(const String & query_, const IAST * ast, ContextMutablePtr q
(*process_it)->setUserProcessList(&user_process_list);
user_process_list.queries.emplace(client_info.current_query_id, res->getQueryStatus());
queries_to_user.emplace(client_info.current_query_id, client_info.current_user);
/// Track memory usage for all simultaneously running queries from single user.
user_process_list.user_memory_tracker.setOrRaiseHardLimit(settings.max_memory_usage_for_user);
@ -316,6 +314,9 @@ ProcessListEntry::~ProcessListEntry()
/// Wait for the query if it is in the cancellation right now.
parent.cancelled_cv.wait(lock.lock, [&]() { return process_list_element_ptr->is_cancelling == false; });
if (auto query_user = parent.queries_to_user.find(query_id); query_user != parent.queries_to_user.end())
parent.queries_to_user.erase(query_user);
/// This removes the memory_tracker of one request.
parent.processes.erase(it);

Some files were not shown because too many files have changed in this diff Show More