Merge branch 'master' into iaadeflate_swpath_compat

This commit is contained in:
Robert Schulze 2023-05-12 16:39:17 +02:00 committed by GitHub
commit 9d34233e65
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
282 changed files with 4414 additions and 2370 deletions

View File

@ -111,6 +111,7 @@ Checks: '*,
-misc-no-recursion,
-misc-non-private-member-variables-in-classes,
-misc-confusable-identifiers, # useful but slooow
-misc-use-anonymous-namespace,
-modernize-avoid-c-arrays,
-modernize-concat-nested-namespaces,

View File

@ -125,8 +125,8 @@ jobs:
SONAR_SCANNER_VERSION: 4.8.0.2856
SONAR_SERVER_URL: "https://sonarcloud.io"
BUILD_WRAPPER_OUT_DIR: build_wrapper_output_directory # Directory where build-wrapper output will be placed
CC: clang-15
CXX: clang++-15
CC: clang-16
CXX: clang++-16
steps:
- name: Check out repository code
uses: ClickHouse/checkout@v1

8
.gitmodules vendored
View File

@ -267,7 +267,10 @@
url = https://github.com/ClickHouse/nats.c
[submodule "contrib/vectorscan"]
path = contrib/vectorscan
url = https://github.com/VectorCamp/vectorscan
# FIXME: update once upstream fixes will be merged:
# - https://github.com/VectorCamp/vectorscan/pull/148
# - https://github.com/VectorCamp/vectorscan/pull/149
url = https://github.com/azat-ch/vectorscan
[submodule "contrib/c-ares"]
path = contrib/c-ares
url = https://github.com/ClickHouse/c-ares
@ -338,6 +341,9 @@
[submodule "contrib/liburing"]
path = contrib/liburing
url = https://github.com/axboe/liburing
[submodule "contrib/libfiu"]
path = contrib/libfiu
url = https://github.com/ClickHouse/libfiu.git
[submodule "contrib/isa-l"]
path = contrib/isa-l
url = https://github.com/ClickHouse/isa-l.git

View File

@ -342,13 +342,6 @@ if (COMPILER_CLANG)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstrict-vtable-pointers")
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 16)
# Set new experimental pass manager, it's a performance, build time and binary size win.
# Can be removed after https://reviews.llvm.org/D66490 merged and released to at least two versions of clang.
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexperimental-new-pass-manager")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fexperimental-new-pass-manager")
endif ()
# We cannot afford to use LTO when compiling unit tests, and it's not enough
# to only supply -fno-lto at the final linking stage. So we disable it
# completely.
@ -395,6 +388,8 @@ if ((NOT OS_LINUX AND NOT OS_ANDROID) OR (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG"))
set(ENABLE_GWP_ASAN OFF)
endif ()
option (ENABLE_FIU "Enable Fiu" ON)
option(WERROR "Enable -Werror compiler option" ON)
if (WERROR)

View File

@ -21,11 +21,17 @@ curl https://clickhouse.com/ | sh
* [Contacts](https://clickhouse.com/company/contact) can help to get your questions answered if there are any.
## Upcoming Events
* [**ClickHouse Spring Meetup in Manhattan**](https://www.meetup.com/clickhouse-new-york-user-group/events/292517734) - April 26 - It's spring, and it's time to meet again in the city! Talks include: "Building a domain specific query language on top of Clickhouse", "A Galaxy of Information", "Our Journey to ClickHouse Cloud from Redshift", and a ClickHouse update!
* [**v23.4 Release Webinar**](https://clickhouse.com/company/events/v23-4-release-webinar?utm_source=github&utm_medium=social&utm_campaign=release-webinar-2023-04) - April 26 - 23.4 is rapidly approaching. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release.
* [**ClickHouse Meetup in Berlin**](https://www.meetup.com/clickhouse-berlin-user-group/events/292892466) - May 16 - Save the date! ClickHouse is coming back to Berlin. Were excited to announce an upcoming ClickHouse Meetup that you wont want to miss. Join us as we gather together to discuss the latest in the world of ClickHouse and share user stories.
* [**v23.5 Release Webinar**](https://clickhouse.com/company/events/v23-5-release-webinar?utm_source=github&utm_medium=social&utm_campaign=release-webinar-2023-05) - May 31 - 23.5 is rapidly approaching. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release.
* [**ClickHouse Meetup in Berlin**](https://www.meetup.com/clickhouse-berlin-user-group/events/292892466) - May 16
* [**ClickHouse Meetup in Barcelona**](https://www.meetup.com/clickhouse-barcelona-user-group/events/292892669) - May 25
* [**ClickHouse Meetup in London**](https://www.meetup.com/clickhouse-london-user-group/events/292892824) - May 25
* [**ClickHouse Meetup in San Francisco**](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/293426725/) - Jun 7
* [**ClickHouse Meetup in Stockholm**](https://www.meetup.com/clickhouse-berlin-user-group/events/292892466) - Jun 13
Also, keep an eye out for upcoming meetups in Amsterdam, Boston, NYC, Beijing, and Toronto. Somewhere else you want us to be? Please feel free to reach out to tyler <at> clickhouse <dot> com.
## Recent Recordings
* **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments"
* **Recording available**: [**v23.3 Release Webinar**](https://www.youtube.com/watch?v=ISaGUjvBNao) UNDROP TABLE, server settings introspection, nested dynamic disks, MySQL compatibility, parseDate Time, Lightweight Deletes, Parallel Replicas, integrations updates, and so much more! Watch it now!
* **Recording available**: [**v23.4 Release Webinar**](https://www.youtube.com/watch?v=4rrf6bk_mOg) Faster Parquet Reading, Asynchonous Connections to Reoplicas, Trailing Comma before FROM, extractKeyValuePairs, integrations updates, and so much more! Watch it now!
* **All release webinar recordings**: [YouTube playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3jAlSy1JxyP8zluvXaN3nxU)

View File

@ -10,9 +10,16 @@ set (SAN_FLAGS "${SAN_FLAGS} -g -fno-omit-frame-pointer -DSANITIZER")
if (SANITIZE)
if (SANITIZE STREQUAL "address")
# LLVM-15 has a bug in Address Sanitizer, preventing the usage of 'sanitize-address-use-after-scope',
# see https://github.com/llvm/llvm-project/issues/58633
set (ASAN_FLAGS "-fsanitize=address -fno-sanitize-address-use-after-scope")
set (ASAN_FLAGS "-fsanitize=address -fsanitize-address-use-after-scope")
if (COMPILER_CLANG)
if (${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER_EQUAL 15 AND ${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 16)
# LLVM-15 has a bug in Address Sanitizer, preventing the usage
# of 'sanitize-address-use-after-scope', see [1].
#
# [1]: https://github.com/llvm/llvm-project/issues/58633
set (ASAN_FLAGS "${ASAN_FLAGS} -fno-sanitize-address-use-after-scope")
endif()
endif()
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${ASAN_FLAGS}")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} ${ASAN_FLAGS}")

View File

@ -70,12 +70,14 @@ if (LINKER_NAME)
if (NOT LLD_PATH)
message (FATAL_ERROR "Using linker ${LINKER_NAME} but can't find its path.")
endif ()
if (COMPILER_CLANG)
# This a temporary quirk to emit .debug_aranges with ThinLTO, can be removed after upgrade to clang-16
# This a temporary quirk to emit .debug_aranges with ThinLTO, it is only the case clang/llvm <16
if (COMPILER_CLANG AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 16)
set (LLD_WRAPPER "${CMAKE_CURRENT_BINARY_DIR}/ld.lld")
configure_file ("${CMAKE_CURRENT_SOURCE_DIR}/cmake/ld.lld.in" "${LLD_WRAPPER}" @ONLY)
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --ld-path=${LLD_WRAPPER}")
else ()
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --ld-path=${LLD_PATH}")
endif()
endif ()

View File

@ -105,6 +105,7 @@ add_contrib (libfarmhash)
add_contrib (icu-cmake icu)
add_contrib (h3-cmake h3)
add_contrib (mariadb-connector-c-cmake mariadb-connector-c)
add_contrib (libfiu-cmake libfiu)
if (ENABLE_TESTS)
add_contrib (googletest-cmake googletest)

1
contrib/libfiu vendored Submodule

@ -0,0 +1 @@
Subproject commit b85edbde4cf974b1b40d27828a56f0505f4e2ee5

View File

@ -0,0 +1,20 @@
if (NOT ENABLE_FIU)
message (STATUS "Not using fiu")
return ()
endif ()
set(FIU_DIR "${ClickHouse_SOURCE_DIR}/contrib/libfiu/")
set(FIU_SOURCES
${FIU_DIR}/libfiu/fiu.c
${FIU_DIR}/libfiu/fiu-rc.c
${FIU_DIR}/libfiu/backtrace.c
${FIU_DIR}/libfiu/wtable.c
)
set(FIU_HEADERS "${FIU_DIR}/libfiu")
add_library(_fiu ${FIU_SOURCES})
target_compile_definitions(_fiu PUBLIC DUMMY_BACKTRACE)
target_include_directories(_fiu PUBLIC ${FIU_HEADERS})
add_library(ch_contrib::fiu ALIAS _fiu)

2
contrib/vectorscan vendored

@ -1 +1 @@
Subproject commit b4bba94b1a250603b0b198e0394946e32f6c3f30
Subproject commit aaca65aa210ce3ec91bd2b249c4d59e55e80a869

View File

@ -362,17 +362,16 @@ def parse_args() -> argparse.Namespace:
parser.add_argument(
"--compiler",
choices=(
"clang-15",
"clang-15-darwin",
"clang-15-darwin-aarch64",
"clang-15-aarch64",
"clang-15-aarch64-v80compat",
"clang-15-ppc64le",
"clang-15-amd64-compat",
"clang-15-freebsd",
"gcc-11",
"clang-16",
"clang-16-darwin",
"clang-16-darwin-aarch64",
"clang-16-aarch64",
"clang-16-aarch64-v80compat",
"clang-16-ppc64le",
"clang-16-amd64-compat",
"clang-16-freebsd",
),
default="clang-15",
default="clang-16",
help="a compiler to use",
)
parser.add_argument(

View File

@ -10,35 +10,20 @@ RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
RUN apt-get update && apt-get --yes --allow-unauthenticated install libclang-${LLVM_VERSION}-dev libmlir-${LLVM_VERSION}-dev
# libclang-15-dev does not contain proper symlink:
#
# This is what cmake will search for:
#
# # readlink -f /usr/lib/llvm-15/lib/libclang-15.so.1
# /usr/lib/x86_64-linux-gnu/libclang-15.so.1
#
# This is what exists:
#
# # ls -l /usr/lib/x86_64-linux-gnu/libclang-15*
# lrwxrwxrwx 1 root root 16 Sep 5 13:31 /usr/lib/x86_64-linux-gnu/libclang-15.so -> libclang-15.so.1
# lrwxrwxrwx 1 root root 21 Sep 5 13:31 /usr/lib/x86_64-linux-gnu/libclang-15.so.15 -> libclang-15.so.15.0.0
# -rw-r--r-- 1 root root 31835760 Sep 5 13:31 /usr/lib/x86_64-linux-gnu/libclang-15.so.15.0.0
#
ARG TARGETARCH
RUN arch=${TARGETARCH:-amd64} \
&& case $arch in \
amd64) rarch=x86_64 ;; \
arm64) rarch=aarch64 ;; \
*) exit 1 ;; \
esac \
&& ln -rsf /usr/lib/$rarch-linux-gnu/libclang-15.so.15 /usr/lib/$rarch-linux-gnu/libclang-15.so.1
esac
# repo versions doesn't work correctly with C++17
# also we push reports to s3, so we add index.html to subfolder urls
# https://github.com/ClickHouse-Extras/woboq_codebrowser/commit/37e15eaf377b920acb0b48dbe82471be9203f76b
RUN git clone --depth=1 https://github.com/ClickHouse/woboq_codebrowser /woboq_codebrowser \
&& cd /woboq_codebrowser \
&& cmake . -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER=clang\+\+-${LLVM_VERSION} -DCMAKE_C_COMPILER=clang-${LLVM_VERSION} \
&& cmake . -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER=clang\+\+-${LLVM_VERSION} -DCMAKE_C_COMPILER=clang-${LLVM_VERSION} -DCLANG_BUILTIN_HEADERS_DIR=/usr/lib/llvm-${LLVM_VERSION}/lib/clang/${LLVM_VERSION}/include \
&& ninja
ENV CODEGEN=/woboq_codebrowser/generator/codebrowser_generator

View File

@ -9,7 +9,7 @@ trap 'kill $(jobs -pr) ||:' EXIT
stage=${stage:-}
# Compiler version, normally set by Dockerfile
export LLVM_VERSION=${LLVM_VERSION:-13}
export LLVM_VERSION=${LLVM_VERSION:-16}
# A variable to pass additional flags to CMake.
# Here we explicitly default it to nothing so that bash doesn't complain about
@ -147,6 +147,7 @@ function clone_submodules
contrib/xxHash
contrib/simdjson
contrib/liburing
contrib/libfiu
)
git submodule sync

View File

@ -15,7 +15,7 @@ stage=${stage:-}
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
echo "$script_dir"
repo_dir=ch
BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-15_debug_none_unsplitted_disable_False_binary"}
BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-16_debug_none_unsplitted_disable_False_binary"}
BINARY_URL_TO_DOWNLOAD=${BINARY_URL_TO_DOWNLOAD:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/$BINARY_TO_DOWNLOAD/clickhouse"}
function git_clone_with_retry

View File

@ -2,7 +2,7 @@
set -euo pipefail
CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-15_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"}
CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-16_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"}
CLICKHOUSE_REPO_PATH=${CLICKHOUSE_REPO_PATH:=""}

View File

@ -2,7 +2,7 @@
set -euo pipefail
CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-15_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"}
CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-16_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"}
CLICKHOUSE_REPO_PATH=${CLICKHOUSE_REPO_PATH:=""}

View File

@ -20,9 +20,6 @@ install_packages package_folder
# Thread Fuzzer allows to check more permutations of possible thread scheduling
# and find more potential issues.
# Temporarily disable ThreadFuzzer with tsan because of https://github.com/google/sanitizers/issues/1540
is_tsan_build=$(clickhouse local -q "select value like '% -fsanitize=thread %' from system.build_options where name='CXX_FLAGS'")
if [ "$is_tsan_build" -eq "0" ]; then
export THREAD_FUZZER_CPU_TIME_PERIOD_US=1000
export THREAD_FUZZER_SLEEP_PROBABILITY=0.1
export THREAD_FUZZER_SLEEP_TIME_US=100000
@ -44,7 +41,6 @@ if [ "$is_tsan_build" -eq "0" ]; then
export THREAD_FUZZER_EXPLICIT_SLEEP_PROBABILITY=0.01
export THREAD_FUZZER_EXPLICIT_MEMORY_EXCEPTION_PROBABILITY=0.01
fi
export ZOOKEEPER_FAULT_INJECTION=1
# Initial run without S3 to create system.*_log on local file system to make it

View File

@ -6,7 +6,7 @@ ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
# 15.0.2
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=15
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=16
RUN apt-get update \
&& apt-get install \
@ -52,6 +52,7 @@ RUN apt-get update \
lld-${LLVM_VERSION} \
llvm-${LLVM_VERSION} \
llvm-${LLVM_VERSION}-dev \
libclang-${LLVM_VERSION}-dev \
moreutils \
nasm \
ninja-build \

View File

@ -11,14 +11,14 @@ This is intended for continuous integration checks that run on Linux servers. If
The cross-build for macOS is based on the [Build instructions](../development/build.md), follow them first.
## Install Clang-15
## Install Clang-16
Follow the instructions from https://apt.llvm.org/ for your Ubuntu or Debian setup.
For example the commands for Bionic are like:
``` bash
sudo echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-15 main" >> /etc/apt/sources.list
sudo apt-get install clang-15
sudo echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-16 main" >> /etc/apt/sources.list
sudo apt-get install clang-16
```
## Install Cross-Compilation Toolset {#install-cross-compilation-toolset}
@ -55,7 +55,7 @@ curl -L 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX1
cd ClickHouse
mkdir build-darwin
cd build-darwin
CC=clang-15 CXX=clang++-15 cmake -DCMAKE_AR:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ar -DCMAKE_INSTALL_NAME_TOOL=${CCTOOLS}/bin/x86_64-apple-darwin-install_name_tool -DCMAKE_RANLIB:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ranlib -DLINKER_NAME=${CCTOOLS}/bin/x86_64-apple-darwin-ld -DCMAKE_TOOLCHAIN_FILE=cmake/darwin/toolchain-x86_64.cmake ..
CC=clang-16 CXX=clang++-16 cmake -DCMAKE_AR:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ar -DCMAKE_INSTALL_NAME_TOOL=${CCTOOLS}/bin/x86_64-apple-darwin-install_name_tool -DCMAKE_RANLIB:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ranlib -DLINKER_NAME=${CCTOOLS}/bin/x86_64-apple-darwin-ld -DCMAKE_TOOLCHAIN_FILE=cmake/darwin/toolchain-x86_64.cmake ..
ninja
```

View File

@ -11,7 +11,7 @@ This is for the case when you have Linux machine and want to use it to build `cl
The cross-build for RISC-V 64 is based on the [Build instructions](../development/build.md), follow them first.
## Install Clang-13
## Install Clang-16
Follow the instructions from https://apt.llvm.org/ for your Ubuntu or Debian setup or do
```
@ -23,7 +23,7 @@ sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
``` bash
cd ClickHouse
mkdir build-riscv64
CC=clang-14 CXX=clang++-14 cmake . -Bbuild-riscv64 -G Ninja -DCMAKE_TOOLCHAIN_FILE=cmake/linux/toolchain-riscv64.cmake -DGLIBC_COMPATIBILITY=OFF -DENABLE_LDAP=OFF -DOPENSSL_NO_ASM=ON -DENABLE_JEMALLOC=ON -DENABLE_PARQUET=OFF -DUSE_UNWIND=OFF -DENABLE_GRPC=OFF -DENABLE_HDFS=OFF -DENABLE_MYSQL=OFF
CC=clang-16 CXX=clang++-16 cmake . -Bbuild-riscv64 -G Ninja -DCMAKE_TOOLCHAIN_FILE=cmake/linux/toolchain-riscv64.cmake -DGLIBC_COMPATIBILITY=OFF -DENABLE_LDAP=OFF -DOPENSSL_NO_ASM=ON -DENABLE_JEMALLOC=ON -DENABLE_PARQUET=OFF -DUSE_UNWIND=OFF -DENABLE_GRPC=OFF -DENABLE_HDFS=OFF -DENABLE_MYSQL=OFF
ninja -C build-riscv64
```

View File

@ -47,8 +47,8 @@ GCC as a compiler is not supported
To build with a specific Clang version:
``` bash
export CC=clang-15
export CXX=clang++-15
export CC=clang-16
export CXX=clang++-16
```
### Checkout ClickHouse Sources {#checkout-clickhouse-sources}

View File

@ -102,7 +102,7 @@ Builds ClickHouse in various configurations for use in further steps. You have t
### Report Details
- **Compiler**: `clang-15`, optionally with the name of a target platform
- **Compiler**: `clang-16`, optionally with the name of a target platform
- **Build type**: `Debug` or `RelWithDebInfo` (cmake).
- **Sanitizer**: `none` (without sanitizers), `address` (ASan), `memory` (MSan), `undefined` (UBSan), or `thread` (TSan).
- **Status**: `success` or `fail`

View File

@ -152,7 +152,7 @@ While inside the `build` directory, configure your build by running CMake. Befor
export CC=clang CXX=clang++
cmake ..
If you installed clang using the automatic installation script above, also specify the version of clang installed in the first command, e.g. `export CC=clang-15 CXX=clang++-15`. The clang version will be in the script output.
If you installed clang using the automatic installation script above, also specify the version of clang installed in the first command, e.g. `export CC=clang-16 CXX=clang++-16`. The clang version will be in the script output.
The `CC` variable specifies the compiler for C (short for C Compiler), and `CXX` variable instructs which C++ compiler is to be used for building.

View File

@ -38,6 +38,10 @@ Structure of the `users` section:
</table_name>
</database_name>
</databases>
<grants>
<query>GRANT SELECT ON system.*</query>
</grants>
</user_name>
<!-- Other users settings -->
</users>
@ -86,6 +90,28 @@ Possible values:
Default value: 0.
### grants {#grants-user-setting}
This setting allows to grant any rights to selected user.
Each element of the list should be `GRANT` query without any grantees specified.
Example:
```xml
<user1>
<grants>
<query>GRANT SHOW ON *.*</query>
<query>GRANT CREATE ON *.* WITH GRANT OPTION</query>
<query>GRANT SELECT ON system.*</query>
</grants>
</user1>
```
This setting can't be specified at the same time with
`dictionaries`, `access_management`, `named_collection_control`, `show_named_collections_secrets`
and `allow_databases` settings.
### user_name/networks {#user-namenetworks}
List of networks from which the user can connect to the ClickHouse server.

View File

@ -1125,6 +1125,12 @@ If unsuccessful, several attempts are made to connect to various replicas.
Default value: 1000.
## connect_timeout_with_failover_secure_ms
Connection timeout for selecting first healthy replica (for secure connections)
Default value: 1000.
## connection_pool_max_wait_ms {#connection-pool-max-wait-ms}
The wait time in milliseconds for a connection when the connection pool is full.
@ -1630,7 +1636,7 @@ For not replicated tables see [non_replicated_deduplication_window](merge-tree-s
### async_insert {#async-insert}
Enables or disables asynchronous inserts. This makes sense only for insertion over HTTP protocol. Note that deduplication isn't working for such inserts.
Enables or disables asynchronous inserts. Note that deduplication is disabled by default, see [async_insert_deduplicate](#async-insert-deduplicate).
If enabled, the data is combined into batches before the insertion into tables, so it is possible to do small and frequent insertions into ClickHouse (up to 15000 queries per second) without buffer tables.
@ -3562,7 +3568,7 @@ Default value: `1`.
If the setting is set to `0`, the table function does not make Nullable columns and inserts default values instead of NULL. This is also applicable for NULL values inside arrays.
## allow_experimental_projection_optimization {#allow-experimental-projection-optimization}
## optimize_use_projections {#optimize_use_projections}
Enables or disables [projection](../../engines/table-engines/mergetree-family/mergetree.md/#projections) optimization when processing `SELECT` queries.
@ -3575,7 +3581,7 @@ Default value: `1`.
## force_optimize_projection {#force-optimize-projection}
Enables or disables the obligatory use of [projections](../../engines/table-engines/mergetree-family/mergetree.md/#projections) in `SELECT` queries, when projection optimization is enabled (see [allow_experimental_projection_optimization](#allow-experimental-projection-optimization) setting).
Enables or disables the obligatory use of [projections](../../engines/table-engines/mergetree-family/mergetree.md/#projections) in `SELECT` queries, when projection optimization is enabled (see [optimize_use_projections](#optimize_use_projections) setting).
Possible values:

View File

@ -215,7 +215,7 @@ Cache **system tables**:
Cache **commands**:
- `SYSTEM DROP FILESYSTEM CACHE (<path>) (ON CLUSTER)`
- `SYSTEM DROP FILESYSTEM CACHE (<cache_name>) (ON CLUSTER)` -- `ON CLUSTER` is only supported when no `<cache_name>` is provided
- `SHOW FILESYSTEM CACHES` -- show list of filesystem caches which were configured on the server. (For versions <= `22.8` the command is named `SHOW CACHES`)
@ -231,10 +231,10 @@ Result:
└───────────┘
```
- `DESCRIBE CACHE '<cache_name>'` - show cache configuration and some general statistics for a specific cache. Cache name can be taken from `SHOW CACHES` command. (For versions <= `22.8` the command is named `DESCRIBE CACHE`)
- `DESCRIBE FILESYSTEM CACHE '<cache_name>'` - show cache configuration and some general statistics for a specific cache. Cache name can be taken from `SHOW FILESYSTEM CACHES` command. (For versions <= `22.8` the command is named `DESCRIBE CACHE`)
```sql
DESCRIBE CACHE 's3_cache'
DESCRIBE FILESYSTEM CACHE 's3_cache'
```
``` text

View File

@ -0,0 +1,55 @@
---
slug: /en/sql-reference/aggregate-functions/reference/first_value
sidebar_position: 7
---
# first_value
Selects the first encountered value, similar to `any`, but could accept NULL.
## examples
```sql
insert into test_data (a,b) values (1,null), (2,3), (4, 5), (6,null)
```
### example1
The NULL value is ignored at default.
```sql
select first_value(b) from test_data
```
```text
┌─first_value_ignore_nulls(b)─┐
│ 3 │
└─────────────────────────────┘
```
### example2
The NULL value is ignored.
```sql
select first_value(b) ignore nulls sfrom test_data
```
```text
┌─first_value_ignore_nulls(b)─┐
│ 3 │
└─────────────────────────────┘
```
### example3
The NULL value is accepted.
```sql
select first_value(b) respect nulls from test_data
```
```text
┌─first_value_respect_nulls(b)─┐
│ ᴺᵁᴸᴸ │
└──────────────────────────────┘
```

View File

@ -0,0 +1,48 @@
---
slug: /en/sql-reference/aggregate-functions/reference/greatest
title: greatest
---
Aggregate function that returns the greatest across a list of values. All of the list members must be of comparable types.
Examples:
```sql
SELECT
toTypeName(greatest(toUInt8(1), 2, toUInt8(3), 3.)),
greatest(1, 2, toUInt8(3), 3.)
```
```response
┌─toTypeName(greatest(toUInt8(1), 2, toUInt8(3), 3.))─┬─greatest(1, 2, toUInt8(3), 3.)─┐
│ Float64 │ 3 │
└─────────────────────────────────────────────────────┴────────────────────────────────┘
```
:::note
The type returned is a Float64 as the UInt8 must be promoted to 64 bit for the comparison.
:::
```sql
SELECT greatest(['hello'], ['there'], ['world'])
```
```response
┌─greatest(['hello'], ['there'], ['world'])─┐
│ ['world'] │
└───────────────────────────────────────────┘
```
```sql
SELECT greatest(toDateTime32(now() + toIntervalDay(1)), toDateTime64(now(), 3))
```
```response
┌─greatest(toDateTime32(plus(now(), toIntervalDay(1))), toDateTime64(now(), 3))─┐
│ 2023-05-12 01:16:59.000 │
└──---──────────────────────────────────────────────────────────────────────────┘
```
:::note
The type returned is a DateTime64 as the DataTime32 must be promoted to 64 bit for the comparison.
:::
Also see [least](/docs/en/sql-reference/aggregate-functions/reference/least.md).

View File

@ -26,6 +26,8 @@ ClickHouse-specific aggregate functions:
- [anyHeavy](../../../sql-reference/aggregate-functions/reference/anyheavy.md)
- [anyLast](../../../sql-reference/aggregate-functions/reference/anylast.md)
- [first_value](../../../sql-reference/aggregate-functions/reference/first_value.md)
- [last_value](../../../sql-reference/aggregate-functions/reference/last_value.md)
- [argMin](../../../sql-reference/aggregate-functions/reference/argmin.md)
- [argMax](../../../sql-reference/aggregate-functions/reference/argmax.md)
- [avgWeighted](../../../sql-reference/aggregate-functions/reference/avgweighted.md)

View File

@ -0,0 +1,53 @@
---
slug: /en/sql-reference/aggregate-functions/reference/last_value
sidebar_position: 8
---
# first_value
Selects the last encountered value, similar to `anyLast`, but could accept NULL.
## examples
```sql
insert into test_data (a,b) values (1,null), (2,3), (4, 5), (6,null)
```
### example1
The NULL value is ignored at default.
```sql
select last_value(b) from test_data
```
```text
┌─last_value_ignore_nulls(b)─┐
│ 5 │
└────────────────────────────┘
```
### example2
The NULL value is ignored.
```sql
select last_value(b) ignore nulls from test_data
```
```text
┌─last_value_ignore_nulls(b)─┐
│ 5 │
└────────────────────────────┘
```
### example3
The NULL value is accepted.
```sql
select last_value(b) respect nulls from test_data
```
```text
┌─last_value_respect_nulls(b)─┐
│ ᴺᵁᴸᴸ │
└─────────────────────────────┘
```

View File

@ -0,0 +1,48 @@
---
slug: /en/sql-reference/aggregate-functions/reference/least
title: least
---
Aggregate function that returns the least across a list of values. All of the list members must be of comparable types.
Examples:
```sql
SELECT
toTypeName(least(toUInt8(1), 2, toUInt8(3), 3.)),
least(1, 2, toUInt8(3), 3.)
```
```response
┌─toTypeName(least(toUInt8(1), 2, toUInt8(3), 3.))─┬─least(1, 2, toUInt8(3), 3.)─┐
│ Float64 │ 1 │
└──────────────────────────────────────────────────┴─────────────────────────────┘
```
:::note
The type returned is a Float64 as the UInt8 must be promoted to 64 bit for the comparison.
:::
```sql
SELECT least(['hello'], ['there'], ['world'])
```
```response
┌─least(['hello'], ['there'], ['world'])─┐
│ ['hello'] │
└────────────────────────────────────────┘
```
```sql
SELECT least(toDateTime32(now() + toIntervalDay(1)), toDateTime64(now(), 3))
```
```response
┌─least(toDateTime32(plus(now(), toIntervalDay(1))), toDateTime64(now(), 3))─┐
│ 2023-05-12 01:16:59.000 │
└────────────────────────────────────────────────────────────────────────────┘
```
:::note
The type returned is a DateTime64 as the DataTime32 must be promoted to 64 bit for the comparison.
:::
Also see [greatest](/docs/en/sql-reference/aggregate-functions/reference/greatest.md).

View File

@ -2218,8 +2218,6 @@ LAYOUT(regexp_tree)
...
```
We only allow `YAMLRegExpTree` to work with regexp_tree dicitionary layout. If you want to use other sources, please set variable `regexp_dict_allow_other_sources` true.
**Source**
We introduce a type of source called `YAMLRegExpTree` representing the structure of Regexp Tree dictionary. An Example of a valid yaml config is like:

View File

@ -59,244 +59,6 @@ A lambda function that accepts multiple arguments can also be passed to a higher
For some functions the first argument (the lambda function) can be omitted. In this case, identical mapping is assumed.
## SQL User Defined Functions
## User Defined Functions (UDFs)
Custom functions from lambda expressions can be created using the [CREATE FUNCTION](../statements/create/function.md) statement. To delete these functions use the [DROP FUNCTION](../statements/drop.md#drop-function) statement.
## Executable User Defined Functions
ClickHouse can call any external executable program or script to process data.
The configuration of executable user defined functions can be located in one or more xml-files. The path to the configuration is specified in the [user_defined_executable_functions_config](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_defined_executable_functions_config) parameter.
A function configuration contains the following settings:
- `name` - a function name.
- `command` - script name to execute or command if `execute_direct` is false.
- `argument` - argument description with the `type`, and optional `name` of an argument. Each argument is described in a separate setting. Specifying name is necessary if argument names are part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Default argument name value is `c` + argument_number.
- `format` - a [format](../../interfaces/formats.md) in which arguments are passed to the command.
- `return_type` - the type of a returned value.
- `return_name` - name of retuned value. Specifying return name is necessary if return name is part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Optional. Default value is `result`.
- `type` - an executable type. If `type` is set to `executable` then single command is started. If it is set to `executable_pool` then a pool of commands is created.
- `max_command_execution_time` - maximum execution time in seconds for processing block of data. This setting is valid for `executable_pool` commands only. Optional. Default value is `10`.
- `command_termination_timeout` - time in seconds during which a command should finish after its pipe is closed. After that time `SIGTERM` is sent to the process executing the command. Optional. Default value is `10`.
- `command_read_timeout` - timeout for reading data from command stdout in milliseconds. Default value 10000. Optional parameter.
- `command_write_timeout` - timeout for writing data to command stdin in milliseconds. Default value 10000. Optional parameter.
- `pool_size` - the size of a command pool. Optional. Default value is `16`.
- `send_chunk_header` - controls whether to send row count before sending a chunk of data to process. Optional. Default value is `false`.
- `execute_direct` - If `execute_direct` = `1`, then `command` will be searched inside user_scripts folder specified by [user_scripts_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_scripts_path). Additional script arguments can be specified using whitespace separator. Example: `script_name arg1 arg2`. If `execute_direct` = `0`, `command` is passed as argument for `bin/sh -c`. Default value is `1`. Optional parameter.
- `lifetime` - the reload interval of a function in seconds. If it is set to `0` then the function is not reloaded. Default value is `0`. Optional parameter.
The command must read arguments from `STDIN` and must output the result to `STDOUT`. The command must process arguments iteratively. That is after processing a chunk of arguments it must wait for the next chunk.
**Example**
Creating `test_function` using XML configuration.
File `test_function.xml` (`/etc/clickhouse-server/test_function.xml` with default path settings).
```xml
<functions>
<function>
<type>executable</type>
<name>test_function_python</name>
<return_type>String</return_type>
<argument>
<type>UInt64</type>
<name>value</name>
</argument>
<format>TabSeparated</format>
<command>test_function.py</command>
</function>
</functions>
```
Script file inside `user_scripts` folder `test_function.py` (`/var/lib/clickhouse/user_scripts/test_function.py` with default path settings).
```python
#!/usr/bin/python3
import sys
if __name__ == '__main__':
for line in sys.stdin:
print("Value " + line, end='')
sys.stdout.flush()
```
Query:
``` sql
SELECT test_function_python(toUInt64(2));
```
Result:
``` text
┌─test_function_python(2)─┐
│ Value 2 │
└─────────────────────────┘
```
Creating `test_function_sum` manually specifying `execute_direct` to `0` using XML configuration.
File `test_function.xml` (`/etc/clickhouse-server/test_function.xml` with default path settings).
```xml
<functions>
<function>
<type>executable</type>
<name>test_function_sum</name>
<return_type>UInt64</return_type>
<argument>
<type>UInt64</type>
<name>lhs</name>
</argument>
<argument>
<type>UInt64</type>
<name>rhs</name>
</argument>
<format>TabSeparated</format>
<command>cd /; clickhouse-local --input-format TabSeparated --output-format TabSeparated --structure 'x UInt64, y UInt64' --query "SELECT x + y FROM table"</command>
<execute_direct>0</execute_direct>
</function>
</functions>
```
Query:
``` sql
SELECT test_function_sum(2, 2);
```
Result:
``` text
┌─test_function_sum(2, 2)─┐
│ 4 │
└─────────────────────────┘
```
Creating `test_function_sum_json` with named arguments and format [JSONEachRow](../../interfaces/formats.md#jsoneachrow) using XML configuration.
File `test_function.xml` (`/etc/clickhouse-server/test_function.xml` with default path settings).
```xml
<functions>
<function>
<type>executable</type>
<name>test_function_sum_json</name>
<return_type>UInt64</return_type>
<return_name>result_name</return_name>
<argument>
<type>UInt64</type>
<name>argument_1</name>
</argument>
<argument>
<type>UInt64</type>
<name>argument_2</name>
</argument>
<format>JSONEachRow</format>
<command>test_function_sum_json.py</command>
</function>
</functions>
```
Script file inside `user_scripts` folder `test_function_sum_json.py` (`/var/lib/clickhouse/user_scripts/test_function_sum_json.py` with default path settings).
```python
#!/usr/bin/python3
import sys
import json
if __name__ == '__main__':
for line in sys.stdin:
value = json.loads(line)
first_arg = int(value['argument_1'])
second_arg = int(value['argument_2'])
result = {'result_name': first_arg + second_arg}
print(json.dumps(result), end='\n')
sys.stdout.flush()
```
Query:
``` sql
SELECT test_function_sum_json(2, 2);
```
Result:
``` text
┌─test_function_sum_json(2, 2)─┐
│ 4 │
└──────────────────────────────┘
```
Executable user defined functions can take constant parameters configured in `command` setting (works only for user defined functions with `executable` type).
File `test_function_parameter_python.xml` (`/etc/clickhouse-server/test_function_parameter_python.xml` with default path settings).
```xml
<functions>
<function>
<type>executable</type>
<name>test_function_parameter_python</name>
<return_type>String</return_type>
<argument>
<type>UInt64</type>
</argument>
<format>TabSeparated</format>
<command>test_function_parameter_python.py {test_parameter:UInt64}</command>
</function>
</functions>
```
Script file inside `user_scripts` folder `test_function_parameter_python.py` (`/var/lib/clickhouse/user_scripts/test_function_parameter_python.py` with default path settings).
```python
#!/usr/bin/python3
import sys
if __name__ == "__main__":
for line in sys.stdin:
print("Parameter " + str(sys.argv[1]) + " value " + str(line), end="")
sys.stdout.flush()
```
Query:
``` sql
SELECT test_function_parameter_python(1)(2);
```
Result:
``` text
┌─test_function_parameter_python(1)(2)─┐
│ Parameter 1 value 2 │
└──────────────────────────────────────┘
```
## Error Handling
Some functions might throw an exception if the data is invalid. In this case, the query is canceled and an error text is returned to the client. For distributed processing, when an exception occurs on one of the servers, the other servers also attempt to abort the query.
## Evaluation of Argument Expressions
In almost all programming languages, one of the arguments might not be evaluated for certain operators. This is usually the operators `&&`, `||`, and `?:`.
But in ClickHouse, arguments of functions (operators) are always evaluated. This is because entire parts of columns are evaluated at once, instead of calculating each row separately.
## Performing Functions for Distributed Query Processing
For distributed query processing, as many stages of query processing as possible are performed on remote servers, and the rest of the stages (merging intermediate results and everything after that) are performed on the requestor server.
This means that functions can be performed on different servers.
For example, in the query `SELECT f(sum(g(x))) FROM distributed_table GROUP BY h(y),`
- if a `distributed_table` has at least two shards, the functions g and h are performed on remote servers, and the function f is performed on the requestor server.
- if a `distributed_table` has only one shard, all the f, g, and h functions are performed on this shards server.
The result of a function usually does not depend on which server it is performed on. However, sometimes this is important.
For example, functions that work with dictionaries use the dictionary that exists on the server they are running on.
Another example is the `hostName` function, which returns the name of the server it is running on in order to make `GROUP BY` by servers in a `SELECT` query.
If a function in a query is performed on the requestor server, but you need to perform it on remote servers, you can wrap it in an any aggregate function or add it to a key in `GROUP BY`.
## Related Content
- [User-defined functions in ClickHouse Cloud](https://clickhouse.com/blog/user-defined-functions-clickhouse-udfs)
ClickHouse supports user-defined functions. See [UDFs](/docs/en/sql-reference/functions/udf.md).

View File

@ -0,0 +1,249 @@
---
slug: /en/sql-reference/functions/udf
sidebar_position: 15
sidebar_label: UDF
---
# UDFs User Defined Functions
## Executable User Defined Functions
ClickHouse can call any external executable program or script to process data.
The configuration of executable user defined functions can be located in one or more xml-files. The path to the configuration is specified in the [user_defined_executable_functions_config](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_defined_executable_functions_config) parameter.
A function configuration contains the following settings:
- `name` - a function name.
- `command` - script name to execute or command if `execute_direct` is false.
- `argument` - argument description with the `type`, and optional `name` of an argument. Each argument is described in a separate setting. Specifying name is necessary if argument names are part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Default argument name value is `c` + argument_number.
- `format` - a [format](../../interfaces/formats.md) in which arguments are passed to the command.
- `return_type` - the type of a returned value.
- `return_name` - name of retuned value. Specifying return name is necessary if return name is part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Optional. Default value is `result`.
- `type` - an executable type. If `type` is set to `executable` then single command is started. If it is set to `executable_pool` then a pool of commands is created.
- `max_command_execution_time` - maximum execution time in seconds for processing block of data. This setting is valid for `executable_pool` commands only. Optional. Default value is `10`.
- `command_termination_timeout` - time in seconds during which a command should finish after its pipe is closed. After that time `SIGTERM` is sent to the process executing the command. Optional. Default value is `10`.
- `command_read_timeout` - timeout for reading data from command stdout in milliseconds. Default value 10000. Optional parameter.
- `command_write_timeout` - timeout for writing data to command stdin in milliseconds. Default value 10000. Optional parameter.
- `pool_size` - the size of a command pool. Optional. Default value is `16`.
- `send_chunk_header` - controls whether to send row count before sending a chunk of data to process. Optional. Default value is `false`.
- `execute_direct` - If `execute_direct` = `1`, then `command` will be searched inside user_scripts folder specified by [user_scripts_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_scripts_path). Additional script arguments can be specified using whitespace separator. Example: `script_name arg1 arg2`. If `execute_direct` = `0`, `command` is passed as argument for `bin/sh -c`. Default value is `1`. Optional parameter.
- `lifetime` - the reload interval of a function in seconds. If it is set to `0` then the function is not reloaded. Default value is `0`. Optional parameter.
The command must read arguments from `STDIN` and must output the result to `STDOUT`. The command must process arguments iteratively. That is after processing a chunk of arguments it must wait for the next chunk.
**Example**
Creating `test_function` using XML configuration.
File `test_function.xml` (`/etc/clickhouse-server/test_function.xml` with default path settings).
```xml
<functions>
<function>
<type>executable</type>
<name>test_function_python</name>
<return_type>String</return_type>
<argument>
<type>UInt64</type>
<name>value</name>
</argument>
<format>TabSeparated</format>
<command>test_function.py</command>
</function>
</functions>
```
Script file inside `user_scripts` folder `test_function.py` (`/var/lib/clickhouse/user_scripts/test_function.py` with default path settings).
```python
#!/usr/bin/python3
import sys
if __name__ == '__main__':
for line in sys.stdin:
print("Value " + line, end='')
sys.stdout.flush()
```
Query:
``` sql
SELECT test_function_python(toUInt64(2));
```
Result:
``` text
┌─test_function_python(2)─┐
│ Value 2 │
└─────────────────────────┘
```
Creating `test_function_sum` manually specifying `execute_direct` to `0` using XML configuration.
File `test_function.xml` (`/etc/clickhouse-server/test_function.xml` with default path settings).
```xml
<functions>
<function>
<type>executable</type>
<name>test_function_sum</name>
<return_type>UInt64</return_type>
<argument>
<type>UInt64</type>
<name>lhs</name>
</argument>
<argument>
<type>UInt64</type>
<name>rhs</name>
</argument>
<format>TabSeparated</format>
<command>cd /; clickhouse-local --input-format TabSeparated --output-format TabSeparated --structure 'x UInt64, y UInt64' --query "SELECT x + y FROM table"</command>
<execute_direct>0</execute_direct>
</function>
</functions>
```
Query:
``` sql
SELECT test_function_sum(2, 2);
```
Result:
``` text
┌─test_function_sum(2, 2)─┐
│ 4 │
└─────────────────────────┘
```
Creating `test_function_sum_json` with named arguments and format [JSONEachRow](../../interfaces/formats.md#jsoneachrow) using XML configuration.
File `test_function.xml` (`/etc/clickhouse-server/test_function.xml` with default path settings).
```xml
<functions>
<function>
<type>executable</type>
<name>test_function_sum_json</name>
<return_type>UInt64</return_type>
<return_name>result_name</return_name>
<argument>
<type>UInt64</type>
<name>argument_1</name>
</argument>
<argument>
<type>UInt64</type>
<name>argument_2</name>
</argument>
<format>JSONEachRow</format>
<command>test_function_sum_json.py</command>
</function>
</functions>
```
Script file inside `user_scripts` folder `test_function_sum_json.py` (`/var/lib/clickhouse/user_scripts/test_function_sum_json.py` with default path settings).
```python
#!/usr/bin/python3
import sys
import json
if __name__ == '__main__':
for line in sys.stdin:
value = json.loads(line)
first_arg = int(value['argument_1'])
second_arg = int(value['argument_2'])
result = {'result_name': first_arg + second_arg}
print(json.dumps(result), end='\n')
sys.stdout.flush()
```
Query:
``` sql
SELECT test_function_sum_json(2, 2);
```
Result:
``` text
┌─test_function_sum_json(2, 2)─┐
│ 4 │
└──────────────────────────────┘
```
Executable user defined functions can take constant parameters configured in `command` setting (works only for user defined functions with `executable` type).
File `test_function_parameter_python.xml` (`/etc/clickhouse-server/test_function_parameter_python.xml` with default path settings).
```xml
<functions>
<function>
<type>executable</type>
<name>test_function_parameter_python</name>
<return_type>String</return_type>
<argument>
<type>UInt64</type>
</argument>
<format>TabSeparated</format>
<command>test_function_parameter_python.py {test_parameter:UInt64}</command>
</function>
</functions>
```
Script file inside `user_scripts` folder `test_function_parameter_python.py` (`/var/lib/clickhouse/user_scripts/test_function_parameter_python.py` with default path settings).
```python
#!/usr/bin/python3
import sys
if __name__ == "__main__":
for line in sys.stdin:
print("Parameter " + str(sys.argv[1]) + " value " + str(line), end="")
sys.stdout.flush()
```
Query:
``` sql
SELECT test_function_parameter_python(1)(2);
```
Result:
``` text
┌─test_function_parameter_python(1)(2)─┐
│ Parameter 1 value 2 │
└──────────────────────────────────────┘
```
## Error Handling
Some functions might throw an exception if the data is invalid. In this case, the query is canceled and an error text is returned to the client. For distributed processing, when an exception occurs on one of the servers, the other servers also attempt to abort the query.
## Evaluation of Argument Expressions
In almost all programming languages, one of the arguments might not be evaluated for certain operators. This is usually the operators `&&`, `||`, and `?:`.
But in ClickHouse, arguments of functions (operators) are always evaluated. This is because entire parts of columns are evaluated at once, instead of calculating each row separately.
## Performing Functions for Distributed Query Processing
For distributed query processing, as many stages of query processing as possible are performed on remote servers, and the rest of the stages (merging intermediate results and everything after that) are performed on the requestor server.
This means that functions can be performed on different servers.
For example, in the query `SELECT f(sum(g(x))) FROM distributed_table GROUP BY h(y),`
- if a `distributed_table` has at least two shards, the functions g and h are performed on remote servers, and the function f is performed on the requestor server.
- if a `distributed_table` has only one shard, all the f, g, and h functions are performed on this shards server.
The result of a function usually does not depend on which server it is performed on. However, sometimes this is important.
For example, functions that work with dictionaries use the dictionary that exists on the server they are running on.
Another example is the `hostName` function, which returns the name of the server it is running on in order to make `GROUP BY` by servers in a `SELECT` query.
If a function in a query is performed on the requestor server, but you need to perform it on remote servers, you can wrap it in an any aggregate function or add it to a key in `GROUP BY`.
## SQL User Defined Functions
Custom functions from lambda expressions can be created using the [CREATE FUNCTION](../statements/create/function.md) statement. To delete these functions use the [DROP FUNCTION](../statements/drop.md#drop-function) statement.
## Related Content
### [User-defined functions in ClickHouse Cloud](https://clickhouse.com/blog/user-defined-functions-clickhouse-udfs)

View File

@ -103,7 +103,11 @@ ALTER TABLE table2 [ON CLUSTER cluster] ATTACH PARTITION partition_expr FROM tab
```
This query copies the data partition from `table1` to `table2`.
Note that data will be deleted neither from `table1` nor from `table2`.
Note that:
- Data will be deleted neither from `table1` nor from `table2`.
- `table1` may be a temporary table.
For the query to run successfully, the following conditions must be met:
@ -117,7 +121,12 @@ For the query to run successfully, the following conditions must be met:
ALTER TABLE table2 [ON CLUSTER cluster] REPLACE PARTITION partition_expr FROM table1
```
This query copies the data partition from the `table1` to `table2` and replaces existing partition in the `table2`. Note that data wont be deleted from `table1`.
This query copies the data partition from the `table1` to `table2` and replaces existing partition in the `table2`.
Note that:
- Data wont be deleted from `table1`.
- `table1` may be a temporary table.
For the query to run successfully, the following conditions must be met:

View File

@ -61,4 +61,6 @@ Result:
## Related Content
- [User-defined functions in ClickHouse Cloud](https://clickhouse.com/blog/user-defined-functions-clickhouse-udfs)
### [Executable UDFs](/docs/en/sql-reference/functions/udf.md).
### [User-defined functions in ClickHouse Cloud](https://clickhouse.com/blog/user-defined-functions-clickhouse-udfs)

View File

@ -12,7 +12,7 @@ Compressed files are supported. Compression type is detected by the extension of
**Syntax**
```sql
SELECT <expr_list> INTO OUTFILE file_name [AND STDOUT] [COMPRESSION type [LEVEL level]]
SELECT <expr_list> INTO OUTFILE file_name [AND STDOUT] [APPEND] [COMPRESSION type [LEVEL level]]
```
`file_name` and `type` are string literals. Supported compression types are: `'none'`, `'gzip'`, `'deflate'`, `'br'`, `'xz'`, `'zstd'`, `'lz4'`, `'bz2'`.
@ -25,6 +25,7 @@ SELECT <expr_list> INTO OUTFILE file_name [AND STDOUT] [COMPRESSION type [LEVEL
- The query will fail if a file with the same file name already exists.
- The default [output format](../../../interfaces/formats.md) is `TabSeparated` (like in the command-line client batch mode). Use [FORMAT](format.md) clause to change it.
- If `AND STDOUT` is mentioned in the query then the output that is written to the file is also displayed on standard output. If used with compression, the plaintext is displayed on standard output.
- If `APPEND` is mentioned in the query then the output is appended to an existing file. If compression is used, append cannot be used.
**Example**

View File

@ -1,7 +1,7 @@
---
slug: /en/sql-reference/table-functions/dictionary
sidebar_position: 54
sidebar_label: dictionary function
sidebar_label: dictionary
title: dictionary
---

View File

@ -391,7 +391,7 @@ INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE set(100) GRANULARIT
## Проекции {#projections}
Проекции похожи на [материализованные представления](../../../sql-reference/statements/create/view.md#materialized), но определяются на уровне кусков данных. Это обеспечивает гарантии согласованности данных наряду с автоматическим использованием в запросах.
Проекции — это экспериментальная возможность. Чтобы включить поддержку проекций, установите настройку [allow_experimental_projection_optimization](../../../operations/settings/settings.md#allow-experimental-projection-optimization) в значение `1`. См. также настройку [force_optimize_projection ](../../../operations/settings/settings.md#force-optimize-projection).
Проекции — это экспериментальная возможность. Чтобы включить поддержку проекций, установите настройку [optimize_use_projections](../../../operations/settings/settings.md#allow-experimental-projection-optimization) в значение `1`. См. также настройку [force_optimize_projection ](../../../operations/settings/settings.md#optimize_use_projections).
Проекции не поддерживаются для запросов `SELECT` с модификатором [FINAL](../../../sql-reference/statements/select/from.md#select-from-final).

View File

@ -37,6 +37,10 @@ sidebar_label: "Настройки пользователей"
<table_name>
</database_name>
</databases>
<grants>
<query>GRANT SELECT ON system.*</query>
</grants>
</user_name>
<!-- Other users settings -->
</users>
@ -89,6 +93,27 @@ sidebar_label: "Настройки пользователей"
Значение по умолчанию: 0.
### grants {#grants-user-setting}
Настройка позволяет указать набор прав для заданного пользователя.
Каждый элемент списка должен представлять собой `GRANT` запрос без указания пользователей в самом запросе.
Пример:
```xml
<user1>
<grants>
<query>GRANT SHOW ON *.*</query>
<query>GRANT CREATE ON *.* WITH GRANT OPTION</query>
<query>GRANT SELECT ON system.*</query>
</grants>
</user1>
```
Настройка не может быть выставлена одновременно с
`dictionaries`, `access_management`, `named_collection_control`, `show_named_collections_secrets`
или `allow_databases`.
### user_name/networks {#user-namenetworks}
Список сетей, из которых пользователь может подключиться к серверу ClickHouse.

View File

@ -3588,7 +3588,7 @@ SETTINGS index_granularity = 8192 │
Строка с идентификатором снэпшота, из которого будет выполняться [исходный дамп таблиц PostgreSQL](../../engines/database-engines/materialized-postgresql.md). Эта настройка должна использоваться совместно с [materialized_postgresql_replication_slot](#materialized-postgresql-replication-slot).
## allow_experimental_projection_optimization {#allow-experimental-projection-optimization}
## optimize_use_projections {#optimize_use_projections}
Включает или отключает поддержку [проекций](../../engines/table-engines/mergetree-family/mergetree.md#projections) при обработке запросов `SELECT`.
@ -3601,7 +3601,7 @@ SETTINGS index_granularity = 8192 │
## force_optimize_projection {#force-optimize-projection}
Включает или отключает обязательное использование [проекций](../../engines/table-engines/mergetree-family/mergetree.md#projections) в запросах `SELECT`, если поддержка проекций включена (см. настройку [allow_experimental_projection_optimization](#allow-experimental-projection-optimization)).
Включает или отключает обязательное использование [проекций](../../engines/table-engines/mergetree-family/mergetree.md#projections) в запросах `SELECT`, если поддержка проекций включена (см. настройку [optimize_use_projections](#optimize_use_projections)).
Возможные значения:

View File

@ -102,7 +102,11 @@ ALTER TABLE table2 [ON CLUSTER cluster] ATTACH PARTITION partition_expr FROM tab
```
Копирует партицию из таблицы `table1` в таблицу `table2`.
Обратите внимание, что данные не удаляются ни из `table1`, ни из `table2`.
Обратите внимание, что:
- Данные не удаляются ни из `table1`, ни из `table2`.
- `table1` может быть временной таблицей.
Следует иметь в виду:
@ -118,7 +122,12 @@ ALTER TABLE table2 [ON CLUSTER cluster] ATTACH PARTITION partition_expr FROM tab
ALTER TABLE table2 [ON CLUSTER cluster] REPLACE PARTITION partition_expr FROM table1
```
Копирует партицию из таблицы `table1` в таблицу `table2` с заменой существующих данных в `table2`. Данные из `table1` не удаляются.
Копирует партицию из таблицы `table1` в таблицу `table2` с заменой существующих данных в `table2`.
Обратите внимание, что:
- Данные из `table1` не удаляются.
- `table1` может быть временной таблицей.
Следует иметь в виду:

View File

@ -1074,7 +1074,7 @@ ClickHouse服务器日志文件中相应的跟踪日志确认了ClickHouse正在
<a href="https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree/#projections" target="_blank">Projections</a>目前是一个实验性的功能因此我们需要告诉ClickHouse
```sql
SET allow_experimental_projection_optimization = 1;
SET optimize_use_projections = 1;
```

View File

@ -1395,8 +1395,8 @@ try
{
Poco::Net::ServerSocket socket;
auto address = socketBindListen(config(), socket, listen_host, port);
socket.setReceiveTimeout(config().getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC));
socket.setSendTimeout(config().getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC));
socket.setReceiveTimeout(Poco::Timespan(config().getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), 0));
socket.setSendTimeout(Poco::Timespan(config().getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC), 0));
return ProtocolServerAdapter(
listen_host,
port_name,
@ -1418,8 +1418,8 @@ try
#if USE_SSL
Poco::Net::SecureServerSocket socket;
auto address = socketBindListen(config(), socket, listen_host, port, /* secure = */ true);
socket.setReceiveTimeout(config().getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC));
socket.setSendTimeout(config().getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC));
socket.setReceiveTimeout(Poco::Timespan(config().getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), 0));
socket.setSendTimeout(Poco::Timespan(config().getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC), 0));
return ProtocolServerAdapter(
listen_host,
secure_port_name,

View File

@ -185,6 +185,7 @@ enum class AccessType
M(SYSTEM_FLUSH, "", GROUP, SYSTEM) \
M(SYSTEM_THREAD_FUZZER, "SYSTEM START THREAD FUZZER, SYSTEM STOP THREAD FUZZER, START THREAD FUZZER, STOP THREAD FUZZER", GLOBAL, SYSTEM) \
M(SYSTEM_UNFREEZE, "SYSTEM UNFREEZE", GLOBAL, SYSTEM) \
M(SYSTEM_FAILPOINT, "SYSTEM ENABLE FAILPOINT, SYSTEM DISABLE FAILPOINT", GLOBAL, SYSTEM) \
M(SYSTEM, "", GROUP, ALL) /* allows to execute SYSTEM {SHUTDOWN|RELOAD CONFIG|...} */ \
\
M(dictGet, "dictHas, dictGetHierarchy, dictIsIn", DICTIONARY, ALL) /* allows to execute functions dictGet(), dictHas(), dictGetHierarchy(), dictIsIn() */\

View File

@ -11,6 +11,10 @@
#include <Common/StringUtils/StringUtils.h>
#include <Common/quoteString.h>
#include <Core/Settings.h>
#include <Interpreters/executeQuery.h>
#include <Parsers/Access/ASTGrantQuery.h>
#include <Parsers/Access/ParserGrantQuery.h>
#include <Parsers/parseQuery.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <Poco/MD5Engine.h>
#include <Poco/JSON/JSON.h>
@ -49,7 +53,12 @@ namespace
UUID generateID(const IAccessEntity & entity) { return generateID(entity.getType(), entity.getName()); }
UserPtr parseUser(const Poco::Util::AbstractConfiguration & config, const String & user_name, const std::unordered_set<UUID> & allowed_profile_ids, bool allow_no_password, bool allow_plaintext_password)
UserPtr parseUser(
const Poco::Util::AbstractConfiguration & config,
const String & user_name,
const std::unordered_set<UUID> & allowed_profile_ids,
bool allow_no_password,
bool allow_plaintext_password)
{
auto user = std::make_shared<User>();
user->setName(user_name);
@ -207,6 +216,65 @@ namespace
}
}
const auto grants_config = user_config + ".grants";
std::optional<Strings> grant_queries;
if (config.has(grants_config))
{
Poco::Util::AbstractConfiguration::Keys keys;
config.keys(grants_config, keys);
grant_queries.emplace();
grant_queries->reserve(keys.size());
for (const auto & key : keys)
{
const auto query = config.getString(grants_config + "." + key);
grant_queries->push_back(query);
}
}
bool access_management = config.getBool(user_config + ".access_management", false);
bool named_collection_control = config.getBool(user_config + ".named_collection_control", false);
bool show_named_collections_secrets = config.getBool(user_config + ".show_named_collections_secrets", false);
if (grant_queries)
if (databases || dictionaries || access_management || named_collection_control || show_named_collections_secrets)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Any other access control settings can't be specified with `grants`");
if (grant_queries)
{
ParserGrantQuery parser;
parser.parseWithoutGrantees();
for (const auto & string_query : *grant_queries)
{
String error_message;
const char * pos = string_query.data();
auto ast = tryParseQuery(parser, pos, pos + string_query.size(), error_message, false, "", false, 0, 0);
if (!ast)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Failed to parse grant query. Error: {}", error_message);
auto & query = ast->as<ASTGrantQuery &>();
if (query.roles)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Roles can't be granted in config file");
if (!query.cluster.empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Can't grant on cluster using config file");
if (query.grantees)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "You can't specify grantees in query using config file");
for (auto & element : query.access_rights_elements)
{
if (query.is_revoke)
user->access.revoke(element);
else
user->access.grant(element);
}
}
}
else
{
/// By default all databases are accessible
/// and the user can grant everything he has.
user->access.grantWithGrantOption(AccessType::ALL);
@ -226,24 +294,22 @@ namespace
user->access.grantWithGrantOption(AccessFlags::allDictionaryFlags(), IDictionary::NO_DATABASE_TAG, dictionary);
}
bool access_management = config.getBool(user_config + ".access_management", false);
if (!access_management)
{
user->access.revoke(AccessType::ACCESS_MANAGEMENT);
user->access.revokeGrantOption(AccessType::ALL);
}
bool named_collection_control = config.getBool(user_config + ".named_collection_control", false);
if (!named_collection_control)
{
user->access.revoke(AccessType::NAMED_COLLECTION_CONTROL);
}
bool show_named_collections_secrets = config.getBool(user_config + ".show_named_collections_secrets", false);
if (!show_named_collections_secrets)
{
user->access.revoke(AccessType::SHOW_NAMED_COLLECTIONS_SECRETS);
}
}
String default_database = config.getString(user_config + ".default_database", "");
user->default_database = default_database;
@ -252,7 +318,11 @@ namespace
}
std::vector<AccessEntityPtr> parseUsers(const Poco::Util::AbstractConfiguration & config, const std::unordered_set<UUID> & allowed_profile_ids, bool allow_no_password, bool allow_plaintext_password)
std::vector<AccessEntityPtr> parseUsers(
const Poco::Util::AbstractConfiguration & config,
const std::unordered_set<UUID> & allowed_profile_ids,
bool allow_no_password,
bool allow_plaintext_password)
{
Poco::Util::AbstractConfiguration::Keys user_names;
config.keys("users", user_names);

View File

@ -14,11 +14,29 @@ AggregateFunctionPtr createAggregateFunctionAny(const std::string & name, const
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValue, AggregateFunctionAnyData>(name, argument_types, parameters, settings));
}
template <bool RespectNulls = false>
AggregateFunctionPtr createAggregateFunctionNullableAny(
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
{
return AggregateFunctionPtr(
createAggregateFunctionSingleNullableValue<AggregateFunctionsSingleValue, AggregateFunctionAnyData, RespectNulls>(
name, argument_types, parameters, settings));
}
AggregateFunctionPtr createAggregateFunctionAnyLast(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
{
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValue, AggregateFunctionAnyLastData>(name, argument_types, parameters, settings));
}
template <bool RespectNulls = false>
AggregateFunctionPtr createAggregateFunctionNullableAnyLast(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
{
return AggregateFunctionPtr(createAggregateFunctionSingleNullableValue<
AggregateFunctionsSingleValue,
AggregateFunctionAnyLastData,
RespectNulls>(name, argument_types, parameters, settings));
}
AggregateFunctionPtr createAggregateFunctionAnyHeavy(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
{
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValue, AggregateFunctionAnyHeavyData>(name, argument_types, parameters, settings));
@ -38,9 +56,15 @@ void registerAggregateFunctionsAny(AggregateFunctionFactory & factory)
factory.registerFunction("first_value",
{ createAggregateFunctionAny, properties },
AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("first_value_respect_nulls",
{ createAggregateFunctionNullableAny<true>, properties },
AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("last_value",
{ createAggregateFunctionAnyLast, properties },
AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("last_value_respect_nulls",
{ createAggregateFunctionNullableAnyLast<true>, properties },
AggregateFunctionFactory::CaseInsensitive);
}
}

View File

@ -768,19 +768,23 @@ static_assert(
/// For any other value types.
template <bool IS_NULLABLE = false>
struct SingleValueDataGeneric
{
private:
using Self = SingleValueDataGeneric;
Field value;
bool has_value = false;
public:
static constexpr bool is_nullable = false;
static constexpr bool is_nullable = IS_NULLABLE;
static constexpr bool is_any = false;
bool has() const
{
if constexpr (is_nullable)
return has_value;
return !value.isNull();
}
@ -815,11 +819,15 @@ public:
void change(const IColumn & column, size_t row_num, Arena *)
{
column.get(row_num, value);
if constexpr (is_nullable)
has_value = true;
}
void change(const Self & to, Arena *)
{
value = to.value;
if constexpr (is_nullable)
has_value = true;
}
bool changeFirstTime(const IColumn & column, size_t row_num, Arena * arena)
@ -835,7 +843,7 @@ public:
bool changeFirstTime(const Self & to, Arena * arena)
{
if (!has() && to.has())
if (!has() && (is_nullable || to.has()))
{
change(to, arena);
return true;
@ -869,6 +877,20 @@ public:
return true;
}
else
{
if constexpr (is_nullable)
{
Field new_value;
column.get(row_num, new_value);
if (!value.isNull() && (new_value.isNull() || new_value < value))
{
value = new_value;
return true;
}
else
return false;
}
else
{
Field new_value;
column.get(row_num, new_value);
@ -881,10 +903,29 @@ public:
return false;
}
}
}
bool changeIfLess(const Self & to, Arena * arena)
{
if (to.has() && (!has() || to.value < value))
if (!to.has())
return false;
if constexpr (is_nullable)
{
if (!has())
{
change(to, arena);
return true;
}
if (to.value.isNull() || (!value.isNull() && to.value < value))
{
value = to.value;
return true;
}
return false;
}
else
{
if (!has() || to.value < value)
{
change(to, arena);
return true;
@ -892,6 +933,7 @@ public:
else
return false;
}
}
bool changeIfGreater(const IColumn & column, size_t row_num, Arena * arena)
{
@ -901,6 +943,19 @@ public:
return true;
}
else
{
if constexpr (is_nullable)
{
Field new_value;
column.get(row_num, new_value);
if (!value.isNull() && (new_value.isNull() || value < new_value))
{
value = new_value;
return true;
}
return false;
}
else
{
Field new_value;
column.get(row_num, new_value);
@ -913,10 +968,24 @@ public:
return false;
}
}
}
bool changeIfGreater(const Self & to, Arena * arena)
{
if (to.has() && (!has() || to.value > value))
if (!to.has())
return false;
if constexpr (is_nullable)
{
if (!value.isNull() && (to.value.isNull() || value < to.value))
{
value = to.value;
return true;
}
return false;
}
else
{
if (!has() || to.value > value)
{
change(to, arena);
return true;
@ -924,6 +993,7 @@ public:
else
return false;
}
}
bool isEqualTo(const IColumn & column, size_t row_num) const
{
@ -1359,6 +1429,17 @@ public:
this->data(place).insertResultInto(to);
}
AggregateFunctionPtr getOwnNullAdapter(
const AggregateFunctionPtr & nested_function,
const DataTypes & /*arguments*/,
const Array & /*params*/,
const AggregateFunctionProperties & /*properties*/) const override
{
if (Data::is_nullable)
return nested_function;
return nullptr;
}
#if USE_EMBEDDED_COMPILER
bool isCompilable() const override

View File

@ -9,7 +9,6 @@
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeString.h>
namespace DB
{
struct Settings;
@ -22,7 +21,6 @@ static IAggregateFunction * createAggregateFunctionSingleValue(const String & na
assertUnary(name, argument_types);
const DataTypePtr & argument_type = argument_types[0];
WhichDataType which(argument_type);
#define DISPATCH(TYPE) \
if (which.idx == TypeIndex::TYPE) return new AggregateFunctionTemplate<Data<SingleValueDataFixed<TYPE>>>(argument_type); /// NOLINT
@ -46,7 +44,28 @@ static IAggregateFunction * createAggregateFunctionSingleValue(const String & na
if (which.idx == TypeIndex::String)
return new AggregateFunctionTemplate<Data<SingleValueDataString>>(argument_type);
return new AggregateFunctionTemplate<Data<SingleValueDataGeneric>>(argument_type);
return new AggregateFunctionTemplate<Data<SingleValueDataGeneric<>>>(argument_type);
}
template <template <typename> class AggregateFunctionTemplate, template <typename> class Data, bool RespectNulls = false>
static IAggregateFunction * createAggregateFunctionSingleNullableValue(const String & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
{
assertNoParameters(name, parameters);
assertUnary(name, argument_types);
const DataTypePtr & argument_type = argument_types[0];
WhichDataType which(argument_type);
// If the result value could be null (excluding the case that no row is matched),
// use SingleValueDataGeneric.
if constexpr (!RespectNulls)
{
return createAggregateFunctionSingleValue<AggregateFunctionTemplate, Data>(name, argument_types, Array(), settings);
}
else
{
return new AggregateFunctionTemplate<Data<SingleValueDataGeneric<true>>>(argument_type);
}
UNREACHABLE();
}
@ -79,7 +98,7 @@ static IAggregateFunction * createAggregateFunctionArgMinMaxSecond(const DataTyp
if (which.idx == TypeIndex::String)
return new AggregateFunctionArgMinMax<AggregateFunctionArgMinMaxData<ResData, MinMaxData<SingleValueDataString>>>(res_type, val_type);
return new AggregateFunctionArgMinMax<AggregateFunctionArgMinMaxData<ResData, MinMaxData<SingleValueDataGeneric>>>(res_type, val_type);
return new AggregateFunctionArgMinMax<AggregateFunctionArgMinMaxData<ResData, MinMaxData<SingleValueDataGeneric<>>>>(res_type, val_type);
}
template <template <typename> class MinMaxData>
@ -115,7 +134,7 @@ static IAggregateFunction * createAggregateFunctionArgMinMax(const String & name
if (which.idx == TypeIndex::String)
return createAggregateFunctionArgMinMaxSecond<MinMaxData, SingleValueDataString>(res_type, val_type);
return createAggregateFunctionArgMinMaxSecond<MinMaxData, SingleValueDataGeneric>(res_type, val_type);
return createAggregateFunctionArgMinMaxSecond<MinMaxData, SingleValueDataGeneric<>>(res_type, val_type);
}
}

View File

@ -162,14 +162,13 @@ private:
class PushOrVisitor
{
public:
PushOrVisitor(ContextPtr context, size_t max_atoms_, size_t num_atoms_)
PushOrVisitor(ContextPtr context, size_t max_atoms_)
: max_atoms(max_atoms_)
, num_atoms(num_atoms_)
, and_resolver(FunctionFactory::instance().get("and", context))
, or_resolver(FunctionFactory::instance().get("or", context))
{}
bool visit(QueryTreeNodePtr & node)
bool visit(QueryTreeNodePtr & node, size_t num_atoms)
{
if (max_atoms && num_atoms > max_atoms)
return false;
@ -187,7 +186,10 @@ public:
{
auto & arguments = function_node->getArguments().getNodes();
for (auto & argument : arguments)
visit(argument);
{
if (!visit(argument, num_atoms))
return false;
}
}
if (name == "or")
@ -217,7 +219,7 @@ public:
auto rhs = createFunctionNode(or_resolver, std::move(other_node), std::move(and_function_arguments[1]));
node = createFunctionNode(and_resolver, std::move(lhs), std::move(rhs));
visit(node);
return visit(node, num_atoms);
}
return true;
@ -225,7 +227,6 @@ public:
private:
size_t max_atoms;
size_t num_atoms;
const FunctionOverloadResolverPtr and_resolver;
const FunctionOverloadResolverPtr or_resolver;
@ -516,8 +517,8 @@ std::optional<CNF> CNF::tryBuildCNF(const QueryTreeNodePtr & node, ContextPtr co
visitor.visit(node_cloned, false);
}
if (PushOrVisitor visitor(context, max_atoms, atom_count);
!visitor.visit(node_cloned))
if (PushOrVisitor visitor(context, max_atoms);
!visitor.visit(node_cloned, atom_count))
return std::nullopt;
CollectGroupsVisitor collect_visitor;

View File

@ -214,14 +214,14 @@ int IBridge::main(const std::vector<std::string> & /*args*/)
Poco::Net::ServerSocket socket;
auto address = socketBindListen(socket, hostname, port, log);
socket.setReceiveTimeout(http_timeout);
socket.setSendTimeout(http_timeout);
socket.setReceiveTimeout(Poco::Timespan(http_timeout, 0));
socket.setSendTimeout(Poco::Timespan(http_timeout, 0));
Poco::ThreadPool server_pool(3, max_server_connections);
Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams;
http_params->setTimeout(http_timeout);
http_params->setKeepAliveTimeout(keep_alive_timeout);
http_params->setTimeout(Poco::Timespan(http_timeout, 0));
http_params->setKeepAliveTimeout(Poco::Timespan(keep_alive_timeout, 0));
auto shared_context = Context::createShared();
auto context = Context::createGlobal(shared_context.get());

View File

@ -353,6 +353,10 @@ target_link_libraries(clickhouse_common_io
Poco::Foundation
)
if (TARGET ch_contrib::fiu)
target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::fiu)
endif()
if (TARGET ch_contrib::cpuid)
target_link_libraries(clickhouse_common_io PRIVATE ch_contrib::cpuid)
endif()

View File

@ -573,6 +573,13 @@ try
CompressionMethod compression_method = chooseCompressionMethod(out_file, compression_method_string);
UInt64 compression_level = 3;
if (query_with_output->is_outfile_append && compression_method != CompressionMethod::None)
{
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Cannot append to compressed file. Please use uncompressed file or remove APPEND keyword.");
}
if (query_with_output->compression_level)
{
const auto & compression_level_node = query_with_output->compression_level->as<ASTLiteral &>();
@ -587,8 +594,14 @@ try
range.second);
}
auto flags = O_WRONLY | O_EXCL;
if (query_with_output->is_outfile_append)
flags |= O_APPEND;
else
flags |= O_CREAT;
out_file_buf = wrapWriteBufferWithCompressionMethod(
std::make_unique<WriteBufferFromFile>(out_file, DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_EXCL | O_CREAT),
std::make_unique<WriteBufferFromFile>(out_file, DBMS_DEFAULT_BUFFER_SIZE, flags),
compression_method,
static_cast<int>(compression_level)
);

View File

@ -264,7 +264,9 @@ void ColumnFunction::appendArgument(const ColumnWithTypeAndName & column)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot capture column {} because it has incompatible type: "
"got {}, but {} is expected.", argument_types.size(), column.type->getName(), argument_types[index]->getName());
captured_columns.push_back(column);
auto captured_column = column;
captured_column.column = captured_column.column->convertToFullColumnIfSparse();
captured_columns.push_back(std::move(captured_column));
}
DataTypePtr ColumnFunction::getResultType() const

View File

@ -1,30 +0,0 @@
#include <Common/Documentation.h>
namespace DB
{
std::string Documentation::examplesAsString() const
{
std::string res;
for (const auto & [example_name, example_query] : examples)
{
res += example_name + ":\n\n";
res += "```sql\n";
res += example_query + "\n";
res += "```\n";
}
return res;
}
std::string Documentation::categoriesAsString() const
{
if (categories.empty())
return "";
std::string res = categories[0];
for (size_t i = 1; i < categories.size(); ++i)
res += ", " + categories[i];
return res;
}
}

166
src/Common/FailPoint.cpp Normal file
View File

@ -0,0 +1,166 @@
#include <Common/Exception.h>
#include <Common/FailPoint.h>
#include <boost/core/noncopyable.hpp>
#include <chrono>
#include <condition_variable>
#include <mutex>
#include <optional>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
};
#if FIU_ENABLE
static struct InitFiu
{
InitFiu()
{
fiu_init(0);
}
} init_fiu;
#endif
/// We should define different types of failpoints here. There are four types of them:
/// - ONCE: the failpoint will only be triggered once.
/// - REGULAR: the failpoint will always be triggered util disableFailPoint is called.
/// - PAUSAEBLE_ONCE: the failpoint will be blocked one time when pauseFailPoint is called, util disableFailPoint is called.
/// - PAUSAEBLE: the failpoint will be blocked every time when pauseFailPoint is called, util disableFailPoint is called.
#define APPLY_FOR_FAILPOINTS(ONCE, REGULAR, PAUSEABLE_ONCE, PAUSEABLE) \
ONCE(replicated_merge_tree_commit_zk_fail_after_op) \
REGULAR(dummy_failpoint) \
PAUSEABLE_ONCE(dummy_pausable_failpoint_once) \
PAUSEABLE(dummy_pausable_failpoint)
namespace FailPoints
{
#define M(NAME) extern const char(NAME)[] = #NAME "";
APPLY_FOR_FAILPOINTS(M, M, M, M)
#undef M
}
std::unordered_map<String, std::shared_ptr<FailPointChannel>> FailPointInjection::fail_point_wait_channels;
std::mutex FailPointInjection::mu;
class FailPointChannel : private boost::noncopyable
{
public:
explicit FailPointChannel(UInt64 timeout_)
: timeout_ms(timeout_)
{}
FailPointChannel()
: timeout_ms(0)
{}
void wait()
{
std::unique_lock lock(m);
if (timeout_ms == 0)
cv.wait(lock);
else
cv.wait_for(lock, std::chrono::milliseconds(timeout_ms));
}
void notifyAll()
{
std::unique_lock lock(m);
cv.notify_all();
}
private:
UInt64 timeout_ms;
std::mutex m;
std::condition_variable cv;
};
void FailPointInjection::enablePauseFailPoint(const String & fail_point_name, UInt64 time_ms)
{
#define SUB_M(NAME, flags) \
if (fail_point_name == FailPoints::NAME) \
{ \
/* FIU_ONETIME -- Only fail once; the point of failure will be automatically disabled afterwards.*/ \
fiu_enable(FailPoints::NAME, 1, nullptr, flags); \
std::lock_guard lock(mu); \
fail_point_wait_channels.try_emplace(FailPoints::NAME, std::make_shared<FailPointChannel>(time_ms)); \
return; \
}
#define ONCE(NAME)
#define REGULAR(NAME)
#define PAUSEABLE_ONCE(NAME) SUB_M(NAME, FIU_ONETIME)
#define PAUSEABLE(NAME) SUB_M(NAME, 0)
APPLY_FOR_FAILPOINTS(ONCE, REGULAR, PAUSEABLE_ONCE, PAUSEABLE)
#undef SUB_M
#undef ONCE
#undef REGULAR
#undef PAUSEABLE_ONCE
#undef PAUSEABLE
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot find fail point {}", fail_point_name);
}
void FailPointInjection::pauseFailPoint(const String & fail_point_name)
{
fiu_do_on(fail_point_name.c_str(), FailPointInjection::wait(fail_point_name););
}
void FailPointInjection::enableFailPoint(const String & fail_point_name)
{
#if FIU_ENABLE
#define SUB_M(NAME, flags, pause) \
if (fail_point_name == FailPoints::NAME) \
{ \
/* FIU_ONETIME -- Only fail once; the point of failure will be automatically disabled afterwards.*/ \
fiu_enable(FailPoints::NAME, 1, nullptr, flags); \
if (pause) \
{ \
std::lock_guard lock(mu); \
fail_point_wait_channels.try_emplace(FailPoints::NAME, std::make_shared<FailPointChannel>()); \
} \
return; \
}
#define ONCE(NAME) SUB_M(NAME, FIU_ONETIME, 0)
#define REGULAR(NAME) SUB_M(NAME, 0, 0)
#define PAUSEABLE_ONCE(NAME) SUB_M(NAME, FIU_ONETIME, 1)
#define PAUSEABLE(NAME) SUB_M(NAME, 0, 1)
APPLY_FOR_FAILPOINTS(ONCE, REGULAR, PAUSEABLE_ONCE, PAUSEABLE)
#undef SUB_M
#undef ONCE
#undef REGULAR
#undef PAUSEABLE_ONCE
#undef PAUSEABLE
#endif
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot find fail point {}", fail_point_name);
}
void FailPointInjection::disableFailPoint(const String & fail_point_name)
{
std::lock_guard lock(mu);
if (auto iter = fail_point_wait_channels.find(fail_point_name); iter != fail_point_wait_channels.end())
{
/// can not rely on deconstruction to do the notify_all things, because
/// if someone wait on this, the deconstruct will never be called.
iter->second->notifyAll();
fail_point_wait_channels.erase(iter);
}
fiu_disable(fail_point_name.c_str());
}
void FailPointInjection::wait(const String & fail_point_name)
{
std::unique_lock lock(mu);
if (auto iter = fail_point_wait_channels.find(fail_point_name); iter == fail_point_wait_channels.end())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Can not find channel for fail point {}", fail_point_name);
else
{
lock.unlock();
auto ptr = iter->second;
ptr->wait();
}
};
}

53
src/Common/FailPoint.h Normal file
View File

@ -0,0 +1,53 @@
#pragma once
#include "config.h"
#include <Common/Exception.h>
#include <Core/Types.h>
#ifdef __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wdocumentation"
#pragma clang diagnostic ignored "-Wreserved-macro-identifier"
#endif
#include <fiu.h>
#include <fiu-control.h>
#ifdef __clang__
#pragma clang diagnostic pop
#endif
#include <any>
#include <unordered_map>
namespace DB
{
/// This is a simple named failpoint library inspired by https://github.com/pingcap/tiflash
/// The usage is simple:
/// 1. define failpoint with a 'failpoint_name' in FailPoint.cpp
/// 2. inject failpoint in normal code
/// 2.1 use fiu_do_on which can inject any code blocks, when it is a regular-triggered / once-triggered failpoint
/// 2.2 use pauseFailPoint when it is a pausable failpoint
/// 3. in test file, we can use system failpoint enable/disable 'failpoint_name'
class FailPointChannel;
class FailPointInjection
{
public:
static void pauseFailPoint(const String & fail_point_name);
static void enableFailPoint(const String & fail_point_name);
static void enablePauseFailPoint(const String & fail_point_name, UInt64 time);
static void disableFailPoint(const String & fail_point_name);
static void wait(const String & fail_point_name);
private:
static std::mutex mu;
static std::unordered_map<String, std::shared_ptr<FailPointChannel>> fail_point_wait_channels;
};
}

View File

@ -0,0 +1,44 @@
#include <Common/FunctionDocumentation.h>
namespace DB
{
std::string FunctionDocumentation::argumentsAsString() const
{
std::string res;
for (const auto & [name, desc] : arguments)
{
res += "- " + name + ":" + desc + "\n";
}
return res;
}
std::string FunctionDocumentation::examplesAsString() const
{
std::string res;
for (const auto & [name, query, result] : examples)
{
res += name + ":\n\n";
res += "``` sql\n";
res += query + "\n";
res += "```\n\n";
res += "``` text\n";
res += result + "\n";
res += "```\n";
}
return res;
}
std::string FunctionDocumentation::categoriesAsString() const
{
if (categories.empty())
return "";
auto it = categories.begin();
std::string res = *it;
for (; it != categories.end(); ++it)
res += ", " + *it;
return res;
}
}

View File

@ -1,15 +1,14 @@
#pragma once
#include <set>
#include <string>
#include <vector>
#include <map>
namespace DB
{
/** Embedded reference documentation for high-level server components,
* such as SQL functions, table functions, data types, table engines, etc.
/** Embedded reference documentation for functions.
*
* The advantages of embedded documentation are:
* - it is easy to write and update with code;
@ -34,50 +33,49 @@ namespace DB
* - examples (queries that can be referenced from the text by names);
* - categories - one or a few text strings like {"Mathematical", "Array Processing"};
*
* Only the description is mandatory.
*
* The description should be represented in Markdown (or just plaintext).
* Some extensions for Markdown are added:
* - [example:name] will reference to an example with the corresponding name.
*
* Documentation does not support multiple languages.
* The only available language is English.
*
* TODO: Allow to specify Syntax, Argument(s) and a Returned Value.
* TODO: Organize Examples as a struct of ExampleName, ExampleQuery and ExampleResult.
*/
struct Documentation
struct FunctionDocumentation
{
using Description = std::string;
using Syntax = std::string;
using Argument = std::string;
struct Argument
{
std::string name;
std::string description;
};
using Arguments = std::vector<Argument>;
using ReturnedValue = std::string;
using ExampleName = std::string;
using ExampleQuery = std::string;
using Examples = std::map<ExampleName, ExampleQuery>;
struct Example
{
std::string name;
std::string query;
std::string result;
};
using Examples = std::vector<Example>;
using Category = std::string;
using Categories = std::vector<Category>;
using Categories = std::set<Category>;
using Related = std::string;
Description description;
Examples examples;
Categories categories;
Documentation(Description description_) : description(std::move(description_)) {} /// NOLINT
Documentation(Description description_, Examples examples_) : description(std::move(description_)), examples(std::move(examples_)) {}
Documentation(Description description_, Examples examples_, Categories categories_)
: description(std::move(description_)), examples(std::move(examples_)), categories(std::move(categories_)) {}
/// TODO: Please remove this constructor. Documentation should always be non-empty.
Documentation() = default;
Description description; /// E.g. "Returns the position (in bytes, starting at 1) of a substring needle in a string haystack."
Syntax syntax; /// E.g. "position(haystack, needle)"
Arguments arguments; /// E.g. ["haystack — String in which the search is performed. String.", "needle — Substring to be searched. String."]
ReturnedValue returned_value; /// E.g. "Starting position in bytes and counting from 1, if the substring was found."
Examples examples; ///
Categories categories; /// E.g. {"String Search"}
std::string argumentsAsString() const;
std::string examplesAsString() const;
std::string categoriesAsString() const;
};

View File

@ -386,8 +386,6 @@ protected:
FallbackSearcher fallback_searcher;
public:
using Searcher = FallbackSearcher;
/** haystack_size_hint - the expected total size of the haystack for `search` calls. Optional (zero means unspecified).
* If you specify it small enough, the fallback algorithm will be used,
* since it is considered that it's useless to waste time initializing the hash table.
@ -729,7 +727,7 @@ public:
using Volnitsky = VolnitskyBase<true, true, ASCIICaseSensitiveStringSearcher>;
using VolnitskyUTF8 = VolnitskyBase<true, false, ASCIICaseSensitiveStringSearcher>; /// exactly same as Volnitsky
using VolnitskyUTF8 = VolnitskyBase<true, false, UTF8CaseSensitiveStringSearcher>;
using VolnitskyCaseInsensitive = VolnitskyBase<false, true, ASCIICaseInsensitiveStringSearcher>; /// ignores non-ASCII bytes
using VolnitskyCaseInsensitiveUTF8 = VolnitskyBase<false, false, UTF8CaseInsensitiveStringSearcher>;
@ -737,7 +735,7 @@ using VolnitskyCaseSensitiveToken = VolnitskyBase<true, true, ASCIICaseSensitive
using VolnitskyCaseInsensitiveToken = VolnitskyBase<false, true, ASCIICaseInsensitiveTokenSearcher>;
using MultiVolnitsky = MultiVolnitskyBase<true, true, ASCIICaseSensitiveStringSearcher>;
using MultiVolnitskyUTF8 = MultiVolnitskyBase<true, false, ASCIICaseSensitiveStringSearcher>;
using MultiVolnitskyUTF8 = MultiVolnitskyBase<true, false, UTF8CaseSensitiveStringSearcher>;
using MultiVolnitskyCaseInsensitive = MultiVolnitskyBase<false, true, ASCIICaseInsensitiveStringSearcher>;
using MultiVolnitskyCaseInsensitiveUTF8 = MultiVolnitskyBase<false, false, UTF8CaseInsensitiveStringSearcher>;

View File

@ -19,18 +19,27 @@ namespace ErrorCodes
class RandomFaultInjection
{
public:
bool must_fail_after_op = false;
bool must_fail_before_op = false;
RandomFaultInjection(double probability, UInt64 seed_) : rndgen(seed_), distribution(probability) { }
void beforeOperation()
{
if (distribution(rndgen))
if (distribution(rndgen) || must_fail_before_op)
{
must_fail_before_op = false;
throw zkutil::KeeperException("Fault injection before operation", Coordination::Error::ZSESSIONEXPIRED);
}
}
void afterOperation()
{
if (distribution(rndgen))
if (distribution(rndgen) || must_fail_after_op)
{
must_fail_after_op = false;
throw zkutil::KeeperException("Fault injection after operation", Coordination::Error::ZOPERATIONTIMEOUT);
}
}
private:
std::mt19937_64 rndgen;
@ -42,6 +51,9 @@ private:
///
class ZooKeeperWithFaultInjection
{
template<bool async_insert>
friend class ReplicatedMergeTreeSinkImpl;
using zk = zkutil::ZooKeeper;
zk::Ptr keeper;

View File

@ -42,7 +42,7 @@ namespace ErrorCodes
#define __NR_renameat2 316
#elif defined(__aarch64__)
#define __NR_renameat2 276
#elif defined(__ppc64__)
#elif defined(__powerpc64__)
#define __NR_renameat2 357
#elif defined(__riscv)
#define __NR_renameat2 276

View File

@ -57,4 +57,5 @@
#cmakedefine01 USE_SKIM
#cmakedefine01 USE_OPENSSL_INTREE
#cmakedefine01 USE_ULID
#cmakedefine01 FIU_ENABLE
#cmakedefine01 USE_BCRYPT

View File

@ -17,18 +17,18 @@ namespace Format
{
using IndexPositions = PODArrayWithStackMemory<UInt64, 64>;
static inline void parseNumber(const String & description, UInt64 l, UInt64 r, UInt64 & res, UInt64 argument_number)
static inline UInt64 parseNumber(const String & description, UInt64 l, UInt64 r, UInt64 argument_number)
{
res = 0;
UInt64 res = 0;
for (UInt64 pos = l; pos < r; ++pos)
{
if (!isNumericASCII(description[pos]))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Not a number in curly braces at position {}", std::to_string(pos));
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Not a number in curly braces at position {}", pos);
res = res * 10 + description[pos] - '0';
if (res >= argument_number)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Too big number for arguments, must be at most {}",
argument_number - 1);
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Too big number for arguments, must be at most {}", argument_number - 1);
}
return res;
}
static inline void init(
@ -132,8 +132,7 @@ namespace Format
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot switch from automatic field numbering to manual field specification");
is_plain_numbering = false;
UInt64 arg;
parseNumber(pattern, last_open, i, arg, argument_number);
UInt64 arg = parseNumber(pattern, last_open, i, argument_number);
if (arg >= argument_number)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Argument is too big for formatting. Note that indexing starts from zero");

View File

@ -56,8 +56,17 @@ static bool check2()
{
ThreadPool pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, 2);
try
{
pool.scheduleOrThrowOnError([&]{ throw std::runtime_error("Hello, world!"); });
pool.scheduleOrThrowOnError([]{});
}
catch (const std::runtime_error &)
{
/// Sometimes exception may be thrown from schedule.
/// Just retry test in that case.
return true;
}
try
{

View File

@ -41,7 +41,7 @@ enum PollPidResult
#define SYS_pidfd_open 434
#elif defined(__aarch64__)
#define SYS_pidfd_open 434
#elif defined(__ppc64__)
#elif defined(__powerpc64__)
#define SYS_pidfd_open 434
#elif defined(__riscv)
#define SYS_pidfd_open 434

View File

@ -501,9 +501,11 @@ void BaseSettings<TTraits>::read(ReadBuffer & in, SettingsWriteFormat format)
const auto & accessor = Traits::Accessor::instance();
while (true)
{
String name = BaseSettingsHelpers::readString(in);
if (name.empty() /* empty string is a marker of the end of settings */)
String read_name = BaseSettingsHelpers::readString(in);
if (read_name.empty() /* empty string is a marker of the end of settings */)
break;
std::string_view name = TTraits::resolveName(read_name);
size_t index = accessor.find(name);
using Flags = BaseSettingsHelpers::Flags;

View File

@ -31,7 +31,7 @@ namespace Authentication
static const size_t SCRAMBLE_LENGTH = 20;
/** Generate a random string using ASCII characters but avoid separator character,
* produce pseudo random numbers between with about 7 bit worth of entropty between 1-127.
* produce pseudo random numbers between with about 7 bit worth of entropy between 1-127.
* https://github.com/mysql/mysql-server/blob/8.0/mysys/crypt_genhash_impl.cc#L427
*/
static String generateScramble()

View File

@ -560,6 +560,7 @@ class IColumn;
M(Bool, asterisk_include_alias_columns, false, "Include ALIAS columns for wildcard query", 0) \
M(Bool, optimize_skip_merged_partitions, false, "Skip partitions with one part with level > 0 in optimize final", 0) \
M(Bool, optimize_on_insert, true, "Do the same transformation for inserted block of data as if merge was done on this block.", 0) \
M(Bool, optimize_use_projections, true, "Automatically choose projections to perform SELECT query", 0) \
M(Bool, force_optimize_projection, false, "If projection optimization is enabled, SELECT queries need to use projection", 0) \
M(Bool, async_socket_for_remote, true, "Asynchronously read from socket executing remote query", 0) \
M(Bool, async_query_sending_for_remote, true, "Asynchronously create connections and send query to shards in remote query", 0) \
@ -641,7 +642,7 @@ class IColumn;
M(UInt64, function_range_max_elements_in_block, 500000000, "Maximum number of values generated by function 'range' per block of data (sum of array sizes for every row in a block, see also 'max_block_size' and 'min_insert_block_size_rows'). It is a safety threshold.", 0) \
M(ShortCircuitFunctionEvaluation, short_circuit_function_evaluation, ShortCircuitFunctionEvaluation::ENABLE, "Setting for short-circuit function evaluation configuration. Possible values: 'enable' - use short-circuit function evaluation for functions that are suitable for it, 'disable' - disable short-circuit function evaluation, 'force_enable' - use short-circuit function evaluation for all functions.", 0) \
\
M(LocalFSReadMethod, storage_file_read_method, LocalFSReadMethod::mmap, "Method of reading data from storage file, one of: read, pread, mmap.", 0) \
M(LocalFSReadMethod, storage_file_read_method, LocalFSReadMethod::mmap, "Method of reading data from storage file, one of: read, pread, mmap. The mmap method does not apply to clickhouse-server (it's intended for clickhouse-local).", 0) \
M(String, local_filesystem_read_method, "pread_threadpool", "Method of reading data from local filesystem, one of: read, pread, mmap, io_uring, pread_threadpool. The 'io_uring' method is experimental and does not work for Log, TinyLog, StripeLog, File, Set and Join, and other tables with append-able files in presence of concurrent reads and writes.", 0) \
M(String, remote_filesystem_read_method, "threadpool", "Method of reading data from remote filesystem, one of: read, threadpool.", 0) \
M(Bool, local_filesystem_read_prefetch, false, "Should use prefetching when reading data from local filesystem.", 0) \
@ -715,26 +716,12 @@ class IColumn;
M(Milliseconds, storage_system_stack_trace_pipe_read_timeout_ms, 100, "Maximum time to read from a pipe for receiving information from the threads when querying the `system.stack_trace` table. This setting is used for testing purposes and not meant to be changed by users.", 0) \
\
M(Bool, parallelize_output_from_storages, true, "Parallelize output for reading step from storage. It allows parallelizing query processing right after reading from storage if possible", 0) \
\
/** Experimental functions */ \
M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \
M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \
M(Bool, allow_experimental_hash_functions, false, "Enable experimental hash functions (hashid, etc)", 0) \
M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \
M(String, insert_deduplication_token, "", "If not empty, used for duplicate detection instead of data digest", 0) \
M(String, ann_index_select_query_params, "", "Parameters passed to ANN indexes in SELECT queries, the format is 'param1=x, param2=y, ...'", 0) \
M(UInt64, max_limit_for_ann_queries, 1000000, "Maximum limit value for using ANN indexes is used to prevent memory overflow in search queries for indexes", 0) \
M(Bool, allow_experimental_annoy_index, false, "Allows to use Annoy index. Disabled by default because this feature is experimental", 0) \
M(Bool, count_distinct_optimization, false, "Rewrite count distinct to subquery of group by", 0) \
M(Bool, throw_on_unsupported_query_inside_transaction, true, "Throw exception if unsupported query is used inside transaction", 0) \
M(TransactionsWaitCSNMode, wait_changes_become_visible_after_commit_mode, TransactionsWaitCSNMode::WAIT_UNKNOWN, "Wait for committed changes to become actually visible in the latest snapshot", 0) \
M(Bool, implicit_transaction, false, "If enabled and not already inside a transaction, wraps the query inside a full transaction (begin + commit or rollback)", 0) \
M(Bool, throw_if_no_data_to_insert, true, "Enables or disables empty INSERTs, enabled by default", 0) \
M(Bool, compatibility_ignore_auto_increment_in_create_table, false, "Ignore AUTO_INCREMENT keyword in column declaration if true, otherwise return error. It simplifies migration from MySQL", 0) \
M(Bool, multiple_joins_try_to_keep_original_names, false, "Do not add aliases to top level expression list on multiple joins rewrite", 0) \
M(UInt64, grace_hash_join_initial_buckets, 1, "Initial number of grace hash join buckets", 0) \
M(UInt64, grace_hash_join_max_buckets, 1024, "Limit on the number of grace hash join buckets", 0) \
M(Bool, optimize_distinct_in_order, false, "This optimization has a bug and it is disabled. Enable DISTINCT optimization if some columns in DISTINCT form a prefix of sorting. For example, prefix of sorting key in merge tree or ORDER BY statement", 0) \
M(Bool, optimize_sorting_by_input_stream_properties, true, "Optimize sorting by sorting properties of input stream", 0) \
M(UInt64, insert_keeper_max_retries, 20, "Max retries for keeper operations during insert", 0) \
M(UInt64, insert_keeper_retry_initial_backoff_ms, 100, "Initial backoff timeout for keeper operations during insert", 0) \
@ -743,10 +730,24 @@ class IColumn;
M(UInt64, insert_keeper_fault_injection_seed, 0, "0 - random seed, otherwise the setting value", 0) \
M(Bool, force_aggregation_in_order, false, "Force use of aggregation in order on remote nodes during distributed aggregation. PLEASE, NEVER CHANGE THIS SETTING VALUE MANUALLY!", IMPORTANT) \
M(UInt64, http_max_request_param_data_size, 10_MiB, "Limit on size of request data used as a query parameter in predefined HTTP requests.", 0) \
M(Bool, function_json_value_return_type_allow_nullable, false, "Allow function JSON_VALUE to return nullable type.", 0) \
M(Bool, function_json_value_return_type_allow_complex, false, "Allow function JSON_VALUE to return complex type, such as: struct, array, map.", 0) \
\
/** Experimental functions */ \
M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \
M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \
M(Bool, allow_experimental_hash_functions, false, "Enable experimental hash functions (hashid, etc)", 0) \
M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \
M(Bool, allow_experimental_annoy_index, false, "Allows to use Annoy index. Disabled by default because this feature is experimental", 0) \
M(UInt64, max_limit_for_ann_queries, 1000000, "Maximum limit value for using ANN indexes is used to prevent memory overflow in search queries for indexes", 0) \
M(Bool, throw_on_unsupported_query_inside_transaction, true, "Throw exception if unsupported query is used inside transaction", 0) \
M(TransactionsWaitCSNMode, wait_changes_become_visible_after_commit_mode, TransactionsWaitCSNMode::WAIT_UNKNOWN, "Wait for committed changes to become actually visible in the latest snapshot", 0) \
M(Bool, implicit_transaction, false, "If enabled and not already inside a transaction, wraps the query inside a full transaction (begin + commit or rollback)", 0) \
M(UInt64, grace_hash_join_initial_buckets, 1, "Initial number of grace hash join buckets", 0) \
M(UInt64, grace_hash_join_max_buckets, 1024, "Limit on the number of grace hash join buckets", 0) \
M(Bool, optimize_distinct_in_order, true, "Enable DISTINCT optimization if some columns in DISTINCT form a prefix of sorting. For example, prefix of sorting key in merge tree or ORDER BY statement", 0) \
M(Bool, allow_experimental_undrop_table_query, false, "Allow to use undrop query to restore dropped table in a limited time", 0) \
M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \
M(Bool, function_json_value_return_type_allow_nullable, false, "Allow function to return nullable type.", 0) \
M(Bool, function_json_value_return_type_allow_complex, false, "Allow function to return complex type, such as: struct, array, map.", 0) \
// End of COMMON_SETTINGS
// Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS.
@ -972,7 +973,6 @@ class IColumn;
M(Bool, output_format_bson_string_as_string, false, "Use BSON String type instead of Binary for String columns.", 0) \
M(Bool, input_format_bson_skip_fields_with_unsupported_types_in_schema_inference, false, "Skip fields with unsupported types while schema inference for format BSON.", 0) \
\
M(Bool, regexp_dict_allow_other_sources, false, "Allow regexp_tree dictionary to use sources other than yaml source.", 0) \
M(Bool, format_display_secrets_in_show_and_select, false, "Do not hide secrets in SHOW and SELECT queries.", IMPORTANT) \
M(Bool, regexp_dict_allow_hyperscan, true, "Allow regexp_tree dictionary using Hyperscan library.", 0) \
\

View File

@ -32,9 +32,9 @@ void SerializationArray::serializeBinary(const Field & field, WriteBuffer & ostr
{
const Array & a = field.get<const Array &>();
writeVarUInt(a.size(), ostr);
for (size_t i = 0; i < a.size(); ++i)
for (const auto & i : a)
{
nested->serializeBinary(a[i], ostr, settings);
nested->serializeBinary(i, ostr, settings);
}
}

View File

@ -246,7 +246,8 @@ void SerializationInfoByName::writeJSON(WriteBuffer & out) const
return writeString(oss.str(), out);
}
void SerializationInfoByName::readJSON(ReadBuffer & in)
SerializationInfoByName SerializationInfoByName::readJSON(
const NamesAndTypesList & columns, const Settings & settings, ReadBuffer & in)
{
String json_str;
readString(json_str, in);
@ -262,8 +263,13 @@ void SerializationInfoByName::readJSON(ReadBuffer & in)
"Unknown version of serialization infos ({}). Should be less or equal than {}",
object->getValue<size_t>(KEY_VERSION), SERIALIZATION_INFO_VERSION);
SerializationInfoByName infos;
if (object->has(KEY_COLUMNS))
{
std::unordered_map<std::string_view, const IDataType *> column_type_by_name;
for (const auto & [name, type] : columns)
column_type_by_name.emplace(name, type.get());
auto array = object->getArray(KEY_COLUMNS);
for (const auto & elem : *array)
{
@ -271,13 +277,22 @@ void SerializationInfoByName::readJSON(ReadBuffer & in)
if (!elem_object->has(KEY_NAME))
throw Exception(ErrorCodes::CORRUPTED_DATA,
"Missed field '{}' in SerializationInfo of columns", KEY_NAME);
"Missed field '{}' in serialization infos", KEY_NAME);
auto name = elem_object->getValue<String>(KEY_NAME);
if (auto it = find(name); it != end())
it->second->fromJSON(*elem_object);
}
auto it = column_type_by_name.find(name);
if (it == column_type_by_name.end())
throw Exception(ErrorCodes::CORRUPTED_DATA,
"Found unexpected column '{}' in serialization infos", name);
auto info = it->second->createSerializationInfo(settings);
info->fromJSON(*elem_object);
infos.emplace(name, std::move(info));
}
}
return infos;
}
}

View File

@ -96,8 +96,10 @@ using MutableSerializationInfos = std::vector<MutableSerializationInfoPtr>;
class SerializationInfoByName : public std::map<String, MutableSerializationInfoPtr>
{
public:
using Settings = SerializationInfo::Settings;
SerializationInfoByName() = default;
SerializationInfoByName(const NamesAndTypesList & columns, const SerializationInfo::Settings & settings);
SerializationInfoByName(const NamesAndTypesList & columns, const Settings & settings);
void add(const Block & block);
void add(const SerializationInfoByName & other);
@ -108,7 +110,9 @@ public:
void replaceData(const SerializationInfoByName & other);
void writeJSON(WriteBuffer & out) const;
void readJSON(ReadBuffer & in);
static SerializationInfoByName readJSON(
const NamesAndTypesList & columns, const Settings & settings, ReadBuffer & in);
};
}

View File

@ -67,7 +67,7 @@ DatabaseMySQL::DatabaseMySQL(
try
{
/// Test that the database is working fine; it will also fetch tables.
empty();
empty(); // NOLINT(bugprone-standalone-empty)
}
catch (...)
{

View File

@ -71,11 +71,11 @@ ClickHouseDictionarySource::ClickHouseDictionarySource(
: update_time{std::chrono::system_clock::from_time_t(0)}
, dict_struct{dict_struct_}
, configuration{configuration_}
, query_builder{dict_struct, configuration.db, "", configuration.table, configuration.query, configuration.where, IdentifierQuotingStyle::Backticks}
, query_builder(std::make_shared<ExternalQueryBuilder>(dict_struct, configuration.db, "", configuration.table, configuration.query, configuration.where, IdentifierQuotingStyle::Backticks))
, sample_block{sample_block_}
, context(context_)
, pool{createPool(configuration)}
, load_all_query{query_builder.composeLoadAllQuery()}
, load_all_query{query_builder->composeLoadAllQuery()}
{
}
@ -84,7 +84,7 @@ ClickHouseDictionarySource::ClickHouseDictionarySource(const ClickHouseDictionar
, dict_struct{other.dict_struct}
, configuration{other.configuration}
, invalidate_query_response{other.invalidate_query_response}
, query_builder{dict_struct, configuration.db, "", configuration.table, configuration.query, configuration.where, IdentifierQuotingStyle::Backticks}
, query_builder(std::make_shared<ExternalQueryBuilder>(dict_struct, configuration.db, "", configuration.table, configuration.query, configuration.where, IdentifierQuotingStyle::Backticks))
, sample_block{other.sample_block}
, context(Context::createCopy(other.context))
, pool{createPool(configuration)}
@ -99,12 +99,12 @@ std::string ClickHouseDictionarySource::getUpdateFieldAndDate()
time_t hr_time = std::chrono::system_clock::to_time_t(update_time) - configuration.update_lag;
std::string str_time = DateLUT::instance().timeToString(hr_time);
update_time = std::chrono::system_clock::now();
return query_builder.composeUpdateQuery(configuration.update_field, str_time);
return query_builder->composeUpdateQuery(configuration.update_field, str_time);
}
else
{
update_time = std::chrono::system_clock::now();
return query_builder.composeLoadAllQuery();
return query_builder->composeLoadAllQuery();
}
}
@ -121,13 +121,13 @@ QueryPipeline ClickHouseDictionarySource::loadUpdatedAll()
QueryPipeline ClickHouseDictionarySource::loadIds(const std::vector<UInt64> & ids)
{
return createStreamForQuery(query_builder.composeLoadIdsQuery(ids));
return createStreamForQuery(query_builder->composeLoadIdsQuery(ids));
}
QueryPipeline ClickHouseDictionarySource::loadKeys(const Columns & key_columns, const std::vector<size_t> & requested_rows)
{
String query = query_builder.composeLoadKeysQuery(key_columns, requested_rows, ExternalQueryBuilder::IN_WITH_TUPLES);
String query = query_builder->composeLoadKeysQuery(key_columns, requested_rows, ExternalQueryBuilder::IN_WITH_TUPLES);
return createStreamForQuery(query);
}

View File

@ -78,11 +78,11 @@ private:
const DictionaryStructure dict_struct;
const Configuration configuration;
mutable std::string invalidate_query_response;
ExternalQueryBuilder query_builder;
ExternalQueryBuilderPtr query_builder;
Block sample_block;
ContextMutablePtr context;
ConnectionPoolWithFailoverPtr pool;
const std::string load_all_query;
std::string load_all_query;
Poco::Logger * log = &Poco::Logger::get("ClickHouseDictionarySource");
/// RegExpTreeDictionary is the only dictionary whose structure of attributions differ from the input block.

View File

@ -36,6 +36,10 @@ struct ExternalQueryBuilder
const std::string & where_,
IdentifierQuotingStyle quoting_style_);
ExternalQueryBuilder(const ExternalQueryBuilder &) = default;
virtual ~ExternalQueryBuilder() = default;
/** Generate a query to load all data. */
std::string composeLoadAllQuery() const;
@ -61,10 +65,10 @@ struct ExternalQueryBuilder
std::string composeLoadKeysQuery(const Columns & key_columns, const std::vector<size_t> & requested_rows, LoadKeysMethod method, size_t partition_key_prefix = 0) const;
private:
protected:
const FormatSettings format_settings = {};
void composeLoadAllQuery(WriteBuffer & out) const;
virtual void composeLoadAllQuery(WriteBuffer & out) const;
/// In the following methods `beg` and `end` specifies which columns to write in expression
@ -93,4 +97,6 @@ private:
void writeQuoted(const std::string & s, WriteBuffer & out) const;
};
using ExternalQueryBuilderPtr = std::shared_ptr<ExternalQueryBuilder>;
}

View File

@ -20,6 +20,7 @@
#include <Functions/Regexps.h>
#include <Functions/checkHyperscanRegexp.h>
#include <QueryPipeline/QueryPipeline.h>
#include <Processors/Sources/BlocksListSource.h>
#include <Dictionaries/ClickHouseDictionarySource.h>
#include <Dictionaries/DictionaryFactory.h>
@ -86,6 +87,32 @@ namespace
}
}
struct ExternalRegexpQueryBuilder final : public ExternalQueryBuilder
{
explicit ExternalRegexpQueryBuilder(const ExternalQueryBuilder & builder) : ExternalQueryBuilder(builder) {}
void composeLoadAllQuery(WriteBuffer & out) const override
{
writeString("SELECT id, parent_id, regexp, keys, values FROM ", out);
if (!db.empty())
{
writeQuoted(db, out);
writeChar('.', out);
}
if (!schema.empty())
{
writeQuoted(schema, out);
writeChar('.', out);
}
writeQuoted(table, out);
if (!where.empty())
{
writeString(" WHERE ", out);
writeString(where, out);
}
}
};
struct RegExpTreeDictionary::RegexTreeNode
{
std::vector<UInt64> children;
@ -117,6 +144,7 @@ struct RegExpTreeDictionary::RegexTreeNode
{
Field field;
std::vector<StringPiece> pieces;
String original_value;
constexpr bool containsBackRefs() const { return !pieces.empty(); }
};
@ -208,12 +236,12 @@ void RegExpTreeDictionary::initRegexNodes(Block & block)
auto string_pieces = createStringPieces(value, num_captures, regex, logger);
if (!string_pieces.empty())
{
node->attributes[name_] = RegexTreeNode::AttributeValue{.field = values[j], .pieces = std::move(string_pieces)};
node->attributes[name_] = RegexTreeNode::AttributeValue{.field = values[j], .pieces = std::move(string_pieces), .original_value = value};
}
else
{
Field field = parseStringToField(values[j].safeGet<String>(), attr.type);
node->attributes[name_] = RegexTreeNode::AttributeValue{.field = std::move(field)};
Field field = parseStringToField(value, attr.type);
node->attributes[name_] = RegexTreeNode::AttributeValue{.field = std::move(field), .original_value = value};
}
}
}
@ -383,6 +411,8 @@ RegExpTreeDictionary::RegExpTreeDictionary(
sample_block.insert(ColumnWithTypeAndName(std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), kKeys));
sample_block.insert(ColumnWithTypeAndName(std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), kValues));
ch_source->sample_block = std::move(sample_block);
ch_source->query_builder = std::make_shared<ExternalRegexpQueryBuilder>(*ch_source->query_builder);
ch_source->load_all_query = ch_source->query_builder->composeLoadAllQuery();
}
loadData();
@ -651,6 +681,52 @@ std::unordered_map<String, ColumnPtr> RegExpTreeDictionary::match(
return result;
}
Pipe RegExpTreeDictionary::read(const Names & , size_t max_block_size, size_t) const
{
auto it = regex_nodes.begin();
size_t block_size = 0;
BlocksList result;
for (;;)
{
Block block;
auto col_id = std::make_shared<DataTypeUInt64>()->createColumn();
auto col_pid = std::make_shared<DataTypeUInt64>()->createColumn();
auto col_regex = std::make_shared<DataTypeString>()->createColumn();
auto col_keys = std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())->createColumn();
auto col_values = std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())->createColumn();
for (;it != regex_nodes.end() && block_size < max_block_size; it++, block_size++)
{
col_id->insert(it->first);
const auto & node = it->second;
col_pid->insert(node->parent_id);
col_regex->insert(node->regex);
std::vector<Field> keys, values;
for (const auto & [key, attr] : node->attributes)
{
keys.push_back(key);
values.push_back(attr.original_value);
}
col_keys->insert(Array(keys.begin(), keys.end()));
col_values->insert(Array(values.begin(), values.end()));
}
block.insert(ColumnWithTypeAndName(std::move(col_id),std::make_shared<DataTypeUInt64>(),kId));
block.insert(ColumnWithTypeAndName(std::move(col_pid),std::make_shared<DataTypeUInt64>(),kParentId));
block.insert(ColumnWithTypeAndName(std::move(col_regex),std::make_shared<DataTypeString>(),kRegExp));
block.insert(ColumnWithTypeAndName(std::move(col_keys),std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()),kKeys));
block.insert(ColumnWithTypeAndName(std::move(col_values),std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()),kValues));
result.push_back(std::move(block));
if (it == regex_nodes.end())
break;
block_size = 0;
}
return Pipe(std::make_shared<BlocksListSource>(std::move(result)));
}
Columns RegExpTreeDictionary::getColumns(
const Strings & attribute_names,
const DataTypes & result_types,
@ -717,10 +793,6 @@ void registerDictionaryRegExpTree(DictionaryFactory & factory)
const auto dict_id = StorageID::fromDictionaryConfig(config, config_prefix);
auto context = copyContextAndApplySettingsFromDictionaryConfig(global_context, config, config_prefix);
if (!context->getSettings().regexp_dict_allow_other_sources && typeid_cast<YAMLRegExpTreeDictionarySource *>(source_ptr.get()) == nullptr)
throw Exception(ErrorCodes::INCORRECT_DICTIONARY_DEFINITION,
"regexp_tree dictionary doesn't accept sources other than yaml source. "
"To active it, please set regexp_dict_allow_other_sources=true");
return std::make_unique<RegExpTreeDictionary>(dict_id, dict_struct, std::move(source_ptr), configuration, context->getSettings().regexp_dict_allow_hyperscan);
};

View File

@ -22,6 +22,8 @@
#include <Dictionaries/DictionaryStructure.h>
#include <Dictionaries/IDictionary.h>
#include <Storages/ColumnsDescription.h>
namespace DB
{
@ -91,10 +93,7 @@ public:
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Dictionary {} does not support method `hasKeys`", name);
}
Pipe read(const Names &, size_t, size_t) const override
{
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Dictionary {} does not support method `read`", name);
}
Pipe read(const Names & columns, size_t max_block_size, size_t num_streams) const override;
ColumnPtr getColumn(
const std::string & attribute_name,

View File

@ -50,7 +50,7 @@ CachedOnDiskReadBufferFromFile::CachedOnDiskReadBufferFromFile(
bool use_external_buffer_,
std::optional<size_t> read_until_position_,
std::shared_ptr<FilesystemCacheLog> cache_log_)
: ReadBufferFromFileBase(settings_.remote_fs_buffer_size, nullptr, 0, file_size_)
: ReadBufferFromFileBase(use_external_buffer_ ? 0 : settings_.remote_fs_buffer_size, nullptr, 0, file_size_)
#ifndef NDEBUG
, log(&Poco::Logger::get("CachedOnDiskReadBufferFromFile(" + source_file_path_ + ")"))
#else
@ -151,10 +151,8 @@ CachedOnDiskReadBufferFromFile::getCacheReadBuffer(const FileSegment & file_segm
/// Do not allow to use asynchronous version of LocalFSReadMethod.
local_read_settings.local_fs_method = LocalFSReadMethod::pread;
// The buffer will unnecessarily allocate a Memory of size local_fs_buffer_size, which will then
// most likely be unused because we're swap()ping our own internal_buffer into
// implementation_buffer before each read. But we can't just set local_fs_buffer_size = 0 here
// because some buffer implementations actually use that memory (e.g. for prefetching).
if (use_external_buffer)
local_read_settings.local_fs_buffer_size = 0;
auto buf = createReadBufferFromFileBase(path, local_read_settings);
@ -389,14 +387,6 @@ CachedOnDiskReadBufferFromFile::getImplementationBuffer(FileSegment & file_segme
auto read_buffer_for_file_segment = getReadBufferForFileSegment(file_segment);
watch.stop();
current_file_segment_counters.increment(
ProfileEvents::FileSegmentWaitReadBufferMicroseconds, watch.elapsedMicroseconds());
[[maybe_unused]] auto download_current_segment = read_type == ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE;
chassert(download_current_segment == file_segment.isDownloader());
chassert(file_segment.range() == range);
chassert(file_offset_of_buffer_end >= range.left && file_offset_of_buffer_end <= range.right);
LOG_TEST(
log,
@ -406,6 +396,15 @@ CachedOnDiskReadBufferFromFile::getImplementationBuffer(FileSegment & file_segme
read_buffer_for_file_segment->getFileOffsetOfBufferEnd(),
file_segment.getInfoForLog());
current_file_segment_counters.increment(
ProfileEvents::FileSegmentWaitReadBufferMicroseconds, watch.elapsedMicroseconds());
[[maybe_unused]] auto download_current_segment = read_type == ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE;
chassert(download_current_segment == file_segment.isDownloader());
chassert(file_segment.range() == range);
chassert(file_offset_of_buffer_end >= range.left && file_offset_of_buffer_end <= range.right);
read_buffer_for_file_segment->setReadUntilPosition(range.right + 1); /// [..., range.right]
switch (read_type)

View File

@ -32,7 +32,7 @@
#define SYS_preadv2 327
#elif defined(__aarch64__)
#define SYS_preadv2 286
#elif defined(__ppc64__)
#elif defined(__powerpc64__)
#define SYS_preadv2 380
#elif defined(__riscv)
#define SYS_preadv2 286

View File

@ -29,7 +29,7 @@ const String & getFunctionCanonicalNameIfAny(const String & name)
void FunctionFactory::registerFunction(
const std::string & name,
FunctionCreator creator,
Documentation doc,
FunctionDocumentation doc,
CaseSensitiveness case_sensitiveness)
{
if (!functions.emplace(name, FunctionFactoryData{creator, doc}).second)
@ -141,7 +141,7 @@ FunctionFactory & FunctionFactory::instance()
return ret;
}
Documentation FunctionFactory::getDocumentation(const std::string & name) const
FunctionDocumentation FunctionFactory::getDocumentation(const std::string & name) const
{
auto it = functions.find(name);
if (it == functions.end())

View File

@ -3,7 +3,7 @@
#include <Interpreters/Context_fwd.h>
#include <Common/register_objects.h>
#include <Common/IFactoryWithAliases.h>
#include <Common/Documentation.h>
#include <Common/FunctionDocumentation.h>
#include <Functions/IFunction.h>
#include <Functions/IFunctionAdaptors.h>
@ -17,7 +17,7 @@ namespace DB
{
using FunctionCreator = std::function<FunctionOverloadResolverPtr(ContextPtr)>;
using FunctionFactoryData = std::pair<FunctionCreator, Documentation>;
using FunctionFactoryData = std::pair<FunctionCreator, FunctionDocumentation>;
/** Creates function by name.
* Function could use for initialization (take ownership of shared_ptr, for example)
@ -29,13 +29,13 @@ public:
static FunctionFactory & instance();
template <typename Function>
void registerFunction(Documentation doc = {}, CaseSensitiveness case_sensitiveness = CaseSensitive)
void registerFunction(FunctionDocumentation doc = {}, CaseSensitiveness case_sensitiveness = CaseSensitive)
{
registerFunction<Function>(Function::name, std::move(doc), case_sensitiveness);
}
template <typename Function>
void registerFunction(const std::string & name, Documentation doc = {}, CaseSensitiveness case_sensitiveness = CaseSensitive)
void registerFunction(const std::string & name, FunctionDocumentation doc = {}, CaseSensitiveness case_sensitiveness = CaseSensitive)
{
if constexpr (std::is_base_of_v<IFunction, Function>)
registerFunction(name, &adaptFunctionToOverloadResolver<Function>, std::move(doc), case_sensitiveness);
@ -63,10 +63,10 @@ public:
void registerFunction(
const std::string & name,
FunctionCreator creator,
Documentation doc = {},
FunctionDocumentation doc = {},
CaseSensitiveness case_sensitiveness = CaseSensitive);
Documentation getDocumentation(const std::string & name) const;
FunctionDocumentation getDocumentation(const std::string & name) const;
private:
using Functions = std::unordered_map<std::string, Value>;

View File

@ -8,14 +8,14 @@ namespace DB
REGISTER_FUNCTION(ToDecimalString)
{
factory.registerFunction<FunctionToDecimalString>(
{
R"(
FunctionDocumentation{
.description=R"(
Returns string representation of a number. First argument is the number of any numeric type,
second argument is the desired number of digits in fractional part. Returns String.
)",
Documentation::Examples{{"toDecimalString", "SELECT toDecimalString(2.1456,2)"}},
Documentation::Categories{"String"}
.examples{{"toDecimalString", "SELECT toDecimalString(2.1456,2)", ""}},
.categories{"String"}
}, FunctionFactory::CaseInsensitive);
}

View File

@ -169,17 +169,17 @@ public:
REGISTER_FUNCTION(ULIDStringToDateTime)
{
factory.registerFunction<FunctionULIDStringToDateTime>(
factory.registerFunction<FunctionULIDStringToDateTime>(FunctionDocumentation
{
R"(
.description=R"(
This function extracts the timestamp from a ULID and returns it as a DateTime64(3) typed value.
The function expects the ULID to be provided as the first argument, which can be either a String or a FixedString(26) data type.
An optional second argument can be passed to specify a timezone for the timestamp.
)",
Documentation::Examples{
{"ulid", "SELECT ULIDStringToDateTime(generateULID())"},
{"timezone", "SELECT ULIDStringToDateTime(generateULID(), 'Asia/Istanbul')"}},
Documentation::Categories{"ULID"}
.examples{
{"ulid", "SELECT ULIDStringToDateTime(generateULID())", ""},
{"timezone", "SELECT ULIDStringToDateTime(generateULID(), 'Asia/Istanbul')", ""}},
.categories{"ULID"}
},
FunctionFactory::CaseSensitive);
}

View File

@ -47,69 +47,69 @@ Returned value: value of the dictionary attribute parsed in the attributes da
Throws an exception if cannot parse the value of the attribute or the value does not match the attribute data type.
)" };
factory.registerFunction<FunctionDictGetNoType<DictionaryGetFunctionType::get>>(Documentation{ fmt::format(dict_get_description, "attributes data type") });
factory.registerFunction<FunctionDictGetNoType<DictionaryGetFunctionType::getOrDefault>>(Documentation{ fmt::format(dict_get_or_default_description, "attributes data type") });
factory.registerFunction<FunctionDictGetOrNull>(Documentation{ dict_get_or_null_description });
factory.registerFunction<FunctionDictGetNoType<DictionaryGetFunctionType::get>>(FunctionDocumentation{ .description=fmt::format(dict_get_description, "attributes data type") });
factory.registerFunction<FunctionDictGetNoType<DictionaryGetFunctionType::getOrDefault>>(FunctionDocumentation{ .description=fmt::format(dict_get_or_default_description, "attributes data type") });
factory.registerFunction<FunctionDictGetOrNull>(FunctionDocumentation{ .description=dict_get_or_null_description });
factory.registerFunction<FunctionDictGetUInt8>(Documentation{ fmt::format(dict_get_description, "UInt8") });
factory.registerFunction<FunctionDictGetUInt16>(Documentation{ fmt::format(dict_get_description, "UInt16") });
factory.registerFunction<FunctionDictGetUInt32>(Documentation{ fmt::format(dict_get_description, "UInt32") });
factory.registerFunction<FunctionDictGetUInt64>(Documentation{ fmt::format(dict_get_description, "UInt64") });
factory.registerFunction<FunctionDictGetInt8>(Documentation{ fmt::format(dict_get_description, "Int8") });
factory.registerFunction<FunctionDictGetInt16>(Documentation{ fmt::format(dict_get_description, "Int16") });
factory.registerFunction<FunctionDictGetInt32>(Documentation{ fmt::format(dict_get_description, "Int32") });
factory.registerFunction<FunctionDictGetInt64>(Documentation{ fmt::format(dict_get_description, "Int64") });
factory.registerFunction<FunctionDictGetFloat32>(Documentation{ fmt::format(dict_get_description, "Float32") });
factory.registerFunction<FunctionDictGetFloat64>(Documentation{ fmt::format(dict_get_description, "Float64") });
factory.registerFunction<FunctionDictGetDate>(Documentation{ fmt::format(dict_get_description, "Date") });
factory.registerFunction<FunctionDictGetDateTime>(Documentation{ fmt::format(dict_get_description, "DateTime") });
factory.registerFunction<FunctionDictGetUUID>(Documentation{ fmt::format(dict_get_description, "UUID") });
factory.registerFunction<FunctionDictGetIPv4>(Documentation{ fmt::format(dict_get_description, "IPv4") });
factory.registerFunction<FunctionDictGetIPv6>(Documentation{ fmt::format(dict_get_description, "IPv6") });
factory.registerFunction<FunctionDictGetString>(Documentation{ fmt::format(dict_get_description, "String") });
factory.registerFunction<FunctionDictGetUInt8>(FunctionDocumentation{ .description=fmt::format(dict_get_description, "UInt8") });
factory.registerFunction<FunctionDictGetUInt16>(FunctionDocumentation{ .description=fmt::format(dict_get_description, "UInt16") });
factory.registerFunction<FunctionDictGetUInt32>(FunctionDocumentation{ .description=fmt::format(dict_get_description, "UInt32") });
factory.registerFunction<FunctionDictGetUInt64>(FunctionDocumentation{ .description=fmt::format(dict_get_description, "UInt64") });
factory.registerFunction<FunctionDictGetInt8>(FunctionDocumentation{ .description=fmt::format(dict_get_description, "Int8") });
factory.registerFunction<FunctionDictGetInt16>(FunctionDocumentation{ .description=fmt::format(dict_get_description, "Int16") });
factory.registerFunction<FunctionDictGetInt32>(FunctionDocumentation{ .description=fmt::format(dict_get_description, "Int32") });
factory.registerFunction<FunctionDictGetInt64>(FunctionDocumentation{ .description=fmt::format(dict_get_description, "Int64") });
factory.registerFunction<FunctionDictGetFloat32>(FunctionDocumentation{ .description=fmt::format(dict_get_description, "Float32") });
factory.registerFunction<FunctionDictGetFloat64>(FunctionDocumentation{ .description=fmt::format(dict_get_description, "Float64") });
factory.registerFunction<FunctionDictGetDate>(FunctionDocumentation{ .description=fmt::format(dict_get_description, "Date") });
factory.registerFunction<FunctionDictGetDateTime>(FunctionDocumentation{ .description=fmt::format(dict_get_description, "DateTime") });
factory.registerFunction<FunctionDictGetUUID>(FunctionDocumentation{ .description=fmt::format(dict_get_description, "UUID") });
factory.registerFunction<FunctionDictGetIPv4>(FunctionDocumentation{ .description=fmt::format(dict_get_description, "IPv4") });
factory.registerFunction<FunctionDictGetIPv6>(FunctionDocumentation{ .description=fmt::format(dict_get_description, "IPv6") });
factory.registerFunction<FunctionDictGetString>(FunctionDocumentation{ .description=fmt::format(dict_get_description, "String") });
factory.registerFunction<FunctionDictGetUInt8OrDefault>(Documentation{ fmt::format(dict_get_or_default_description, "UInt8") });
factory.registerFunction<FunctionDictGetUInt16OrDefault>(Documentation{ fmt::format(dict_get_or_default_description, "UInt16") });
factory.registerFunction<FunctionDictGetUInt32OrDefault>(Documentation{ fmt::format(dict_get_or_default_description, "UInt32") });
factory.registerFunction<FunctionDictGetUInt64OrDefault>(Documentation{ fmt::format(dict_get_or_default_description, "UInt64") });
factory.registerFunction<FunctionDictGetInt8OrDefault>(Documentation{ fmt::format(dict_get_or_default_description, "Int8") });
factory.registerFunction<FunctionDictGetInt16OrDefault>(Documentation{ fmt::format(dict_get_or_default_description, "Int16") });
factory.registerFunction<FunctionDictGetInt32OrDefault>(Documentation{ fmt::format(dict_get_or_default_description, "Int32") });
factory.registerFunction<FunctionDictGetInt64OrDefault>(Documentation{ fmt::format(dict_get_or_default_description, "Int64") });
factory.registerFunction<FunctionDictGetFloat32OrDefault>(Documentation{ fmt::format(dict_get_or_default_description, "Float32") });
factory.registerFunction<FunctionDictGetFloat64OrDefault>(Documentation{ fmt::format(dict_get_or_default_description, "Float64") });
factory.registerFunction<FunctionDictGetDateOrDefault>(Documentation{ fmt::format(dict_get_or_default_description, "Date") });
factory.registerFunction<FunctionDictGetDateTimeOrDefault>(Documentation{ fmt::format(dict_get_or_default_description, "DateTime") });
factory.registerFunction<FunctionDictGetUUIDOrDefault>(Documentation{ fmt::format(dict_get_or_default_description, "UUID") });
factory.registerFunction<FunctionDictGetIPv4OrDefault>(Documentation{ fmt::format(dict_get_or_default_description, "IPv4") });
factory.registerFunction<FunctionDictGetIPv6OrDefault>(Documentation{ fmt::format(dict_get_or_default_description, "IPv6") });
factory.registerFunction<FunctionDictGetStringOrDefault>(Documentation{ fmt::format(dict_get_or_default_description, "String") });
factory.registerFunction<FunctionDictGetUInt8OrDefault>(FunctionDocumentation{ .description=fmt::format(dict_get_or_default_description, "UInt8") });
factory.registerFunction<FunctionDictGetUInt16OrDefault>(FunctionDocumentation{ .description=fmt::format(dict_get_or_default_description, "UInt16") });
factory.registerFunction<FunctionDictGetUInt32OrDefault>(FunctionDocumentation{ .description=fmt::format(dict_get_or_default_description, "UInt32") });
factory.registerFunction<FunctionDictGetUInt64OrDefault>(FunctionDocumentation{ .description=fmt::format(dict_get_or_default_description, "UInt64") });
factory.registerFunction<FunctionDictGetInt8OrDefault>(FunctionDocumentation{ .description=fmt::format(dict_get_or_default_description, "Int8") });
factory.registerFunction<FunctionDictGetInt16OrDefault>(FunctionDocumentation{ .description=fmt::format(dict_get_or_default_description, "Int16") });
factory.registerFunction<FunctionDictGetInt32OrDefault>(FunctionDocumentation{ .description=fmt::format(dict_get_or_default_description, "Int32") });
factory.registerFunction<FunctionDictGetInt64OrDefault>(FunctionDocumentation{ .description=fmt::format(dict_get_or_default_description, "Int64") });
factory.registerFunction<FunctionDictGetFloat32OrDefault>(FunctionDocumentation{ .description=fmt::format(dict_get_or_default_description, "Float32") });
factory.registerFunction<FunctionDictGetFloat64OrDefault>(FunctionDocumentation{ .description=fmt::format(dict_get_or_default_description, "Float64") });
factory.registerFunction<FunctionDictGetDateOrDefault>(FunctionDocumentation{ .description=fmt::format(dict_get_or_default_description, "Date") });
factory.registerFunction<FunctionDictGetDateTimeOrDefault>(FunctionDocumentation{ .description=fmt::format(dict_get_or_default_description, "DateTime") });
factory.registerFunction<FunctionDictGetUUIDOrDefault>(FunctionDocumentation{ .description=fmt::format(dict_get_or_default_description, "UUID") });
factory.registerFunction<FunctionDictGetIPv4OrDefault>(FunctionDocumentation{ .description=fmt::format(dict_get_or_default_description, "IPv4") });
factory.registerFunction<FunctionDictGetIPv6OrDefault>(FunctionDocumentation{ .description=fmt::format(dict_get_or_default_description, "IPv6") });
factory.registerFunction<FunctionDictGetStringOrDefault>(FunctionDocumentation{ .description=fmt::format(dict_get_or_default_description, "String") });
factory.registerFunction<FunctionDictHas>(Documentation{ R"(
factory.registerFunction<FunctionDictHas>(FunctionDocumentation{ .description=R"(
Checks whether a key is present in a dictionary.
Accepts 2 parameters: name of the dictionary, key value - expression returning dictionary key-type value or tuple-type value - depending on the dictionary configuration.
Returned value: 0 if there is no key, 1 if there is a key, type of UInt8
)"});
factory.registerFunction<FunctionDictGetHierarchy>(Documentation{ R"(
factory.registerFunction<FunctionDictGetHierarchy>(FunctionDocumentation{ .description=R"(
Creates an array, containing all the parents of a key in the hierarchical dictionary.
Accepts 2 parameters: name of the dictionary, key value - expression returning a UInt64-type value.
Returned value: parents for the key, type of Array(UInt64)
)"});
factory.registerFunction<FunctionDictIsIn>(Documentation{ R"(
factory.registerFunction<FunctionDictIsIn>(FunctionDocumentation{ .description=R"(
Checks the ancestor of a key through the whole hierarchical chain in the dictionary.
Accepts 3 parameters: name of the dictionary, key to be checked - expression returning a UInt64-type value, alleged ancestor of the key - expression returning a UInt64-type.
Returned value: 0 if key is not a child of the ancestor, 1 if key is a child of the ancestor or if key is the ancestor, type of UInt8
)"});
factory.registerFunction<FunctionDictGetChildrenOverloadResolver>(Documentation{ R"(
factory.registerFunction<FunctionDictGetChildrenOverloadResolver>(FunctionDocumentation{ .description=R"(
Returns first-level children as an array of indexes. It is the inverse transformation for dictGetHierarchy.
Accepts 2 parameters: name of the dictionary, key value - expression returning a UInt64-type value.
Returned value: first-level descendants for the key, type of Array(UInt64)
)"});
factory.registerFunction<FunctionDictGetDescendantsOverloadResolver>(Documentation{ R"(
factory.registerFunction<FunctionDictGetDescendantsOverloadResolver>(FunctionDocumentation{ .description=R"(
Returns all descendants as if dictGetChildren function was applied level times recursively.
Accepts 3 parameters: name of the dictionary, key value - expression returning a UInt64-type value, level hierarchy level - If level = 0 returns all descendants to the end - UInt8
Returned value: descendants for the key, type of Array(UInt64)

View File

@ -15,15 +15,15 @@ REGISTER_FUNCTION(Hashing)
factory.registerFunction<FunctionSipHash64Keyed>();
factory.registerFunction<FunctionSipHash128>();
factory.registerFunction<FunctionSipHash128Keyed>();
factory.registerFunction<FunctionSipHash128Reference>({
"Like [sipHash128](#hash_functions-siphash128) but implements the 128-bit algorithm from the original authors of SipHash.",
Documentation::Examples{{"hash", "SELECT hex(sipHash128Reference('foo', '\\x01', 3))"}},
Documentation::Categories{"Hash"}
factory.registerFunction<FunctionSipHash128Reference>(FunctionDocumentation{
.description="Like [sipHash128](#hash_functions-siphash128) but implements the 128-bit algorithm from the original authors of SipHash.",
.examples{{"hash", "SELECT hex(sipHash128Reference('foo', '\\x01', 3))", ""}},
.categories{"Hash"}
});
factory.registerFunction<FunctionSipHash128ReferenceKeyed>({
"Same as [sipHash128Reference](#hash_functions-siphash128reference) but additionally takes an explicit key argument instead of using a fixed key.",
Documentation::Examples{{"hash", "SELECT hex(sipHash128ReferenceKeyed((506097522914230528, 1084818905618843912),'foo', '\\x01', 3));"}},
Documentation::Categories{"Hash"}
factory.registerFunction<FunctionSipHash128ReferenceKeyed>(FunctionDocumentation{
.description="Same as [sipHash128Reference](#hash_functions-siphash128reference) but additionally takes an explicit key argument instead of using a fixed key.",
.examples{{"hash", "SELECT hex(sipHash128ReferenceKeyed((506097522914230528, 1084818905618843912),'foo', '\\x01', 3));", ""}},
.categories{"Hash"}
});
factory.registerFunction<FunctionCityHash64>();
factory.registerFunction<FunctionFarmFingerprint64>();
@ -37,10 +37,10 @@ REGISTER_FUNCTION(Hashing)
factory.registerFunction<FunctionXxHash32>();
factory.registerFunction<FunctionXxHash64>();
factory.registerFunction<FunctionXXH3>(
{
"Calculates value of XXH3 64-bit hash function. Refer to https://github.com/Cyan4973/xxHash for detailed documentation.",
Documentation::Examples{{"hash", "SELECT xxh3('ClickHouse')"}},
Documentation::Categories{"Hash"}
FunctionDocumentation{
.description="Calculates value of XXH3 64-bit hash function. Refer to https://github.com/Cyan4973/xxHash for detailed documentation.",
.examples{{"hash", "SELECT xxh3('ClickHouse')", ""}},
.categories{"Hash"}
},
FunctionFactory::CaseSensitive);
@ -48,16 +48,16 @@ REGISTER_FUNCTION(Hashing)
factory.registerFunction<FunctionBLAKE3>(
{
R"(
FunctionDocumentation{
.description=R"(
Calculates BLAKE3 hash string and returns the resulting set of bytes as FixedString.
This cryptographic hash-function is integrated into ClickHouse with BLAKE3 Rust library.
The function is rather fast and shows approximately two times faster performance compared to SHA-2, while generating hashes of the same length as SHA-256.
It returns a BLAKE3 hash as a byte array with type FixedString(32).
)",
Documentation::Examples{
{"hash", "SELECT hex(BLAKE3('ABC'))"}},
Documentation::Categories{"Hash"}
.examples{
{"hash", "SELECT hex(BLAKE3('ABC'))", ""}},
.categories{"Hash"}
},
FunctionFactory::CaseSensitive);
}

View File

@ -108,6 +108,10 @@ struct NgramDistanceImpl
if constexpr (case_insensitive)
{
#if defined(MEMORY_SANITIZER)
/// Due to PODArray padding accessing more elements should be OK
__msan_unpoison(code_points + (N - 1), padding_offset * sizeof(CodePoint));
#endif
/// We really need template lambdas with C++20 to do it inline
unrollLowering<N - 1>(code_points, std::make_index_sequence<padding_offset>());
}

View File

@ -100,8 +100,8 @@ namespace
REGISTER_FUNCTION(JSONArrayLength)
{
factory.registerFunction<FunctionJSONArrayLength>(Documentation{
"Returns the number of elements in the outermost JSON array. The function returns NULL if input JSON string is invalid."});
factory.registerFunction<FunctionJSONArrayLength>(FunctionDocumentation{
.description="Returns the number of elements in the outermost JSON array. The function returns NULL if input JSON string is invalid."});
/// For Spark compatibility.
factory.registerAlias("JSON_ARRAY_LENGTH", "JSONArrayLength", FunctionFactory::CaseInsensitive);

View File

@ -44,32 +44,32 @@ using FunctionCutToFirstSignificantSubdomainWithWWWRFC = FunctionStringToString<
REGISTER_FUNCTION(CutToFirstSignificantSubdomain)
{
factory.registerFunction<FunctionCutToFirstSignificantSubdomain>(
{
R"(Returns the part of the domain that includes top-level subdomains up to the "first significant subdomain" (see documentation of the `firstSignificantSubdomain`).)",
Documentation::Examples{
{"cutToFirstSignificantSubdomain1", "SELECT cutToFirstSignificantSubdomain('https://news.clickhouse.com.tr/')"},
{"cutToFirstSignificantSubdomain2", "SELECT cutToFirstSignificantSubdomain('www.tr')"},
{"cutToFirstSignificantSubdomain3", "SELECT cutToFirstSignificantSubdomain('tr')"},
FunctionDocumentation{
.description=R"(Returns the part of the domain that includes top-level subdomains up to the "first significant subdomain" (see documentation of the `firstSignificantSubdomain`).)",
.examples{
{"cutToFirstSignificantSubdomain1", "SELECT cutToFirstSignificantSubdomain('https://news.clickhouse.com.tr/')", ""},
{"cutToFirstSignificantSubdomain2", "SELECT cutToFirstSignificantSubdomain('www.tr')", ""},
{"cutToFirstSignificantSubdomain3", "SELECT cutToFirstSignificantSubdomain('tr')", ""},
},
Documentation::Categories{"URL"}
.categories{"URL"}
});
factory.registerFunction<FunctionCutToFirstSignificantSubdomainWithWWW>(
{
R"(Returns the part of the domain that includes top-level subdomains up to the "first significant subdomain", without stripping "www".)",
Documentation::Examples{},
Documentation::Categories{"URL"}
FunctionDocumentation{
.description=R"(Returns the part of the domain that includes top-level subdomains up to the "first significant subdomain", without stripping "www".)",
.examples{},
.categories{"URL"}
});
factory.registerFunction<FunctionCutToFirstSignificantSubdomainRFC>(
{
R"(Similar to `cutToFirstSignificantSubdomain` but follows stricter rules to be compatible with RFC 3986 and less performant.)",
Documentation::Examples{},
Documentation::Categories{"URL"}
FunctionDocumentation{
.description=R"(Similar to `cutToFirstSignificantSubdomain` but follows stricter rules to be compatible with RFC 3986 and less performant.)",
.examples{},
.categories{"URL"}
});
factory.registerFunction<FunctionCutToFirstSignificantSubdomainWithWWWRFC>(
{
R"(Similar to `cutToFirstSignificantSubdomainWithWWW` but follows stricter rules to be compatible with RFC 3986 and less performant.)",
Documentation::Examples{},
Documentation::Categories{"URL"}
FunctionDocumentation{
.description=R"(Similar to `cutToFirstSignificantSubdomainWithWWW` but follows stricter rules to be compatible with RFC 3986 and less performant.)",
.examples{},
.categories{"URL"}
});
}

View File

@ -43,39 +43,39 @@ using FunctionCutToFirstSignificantSubdomainCustomWithWWWRFC = FunctionCutToFirs
REGISTER_FUNCTION(CutToFirstSignificantSubdomainCustom)
{
factory.registerFunction<FunctionCutToFirstSignificantSubdomainCustom>(
{
R"(
FunctionDocumentation{
.description=R"(
Returns the part of the domain that includes top-level subdomains up to the first significant subdomain. Accepts custom TLD list name.
Can be useful if you need fresh TLD list or you have custom.
)",
Documentation::Examples{
{"cutToFirstSignificantSubdomainCustom", "SELECT cutToFirstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list');"},
.examples{
{"cutToFirstSignificantSubdomainCustom", "SELECT cutToFirstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list');", ""},
},
Documentation::Categories{"URL"}
.categories{"URL"}
});
factory.registerFunction<FunctionCutToFirstSignificantSubdomainCustomWithWWW>(
{
R"(
FunctionDocumentation{
.description=R"(
Returns the part of the domain that includes top-level subdomains up to the first significant subdomain without stripping `www`.
Accepts custom TLD list name from config.
Can be useful if you need fresh TLD list or you have custom.
)",
Documentation::Examples{{"cutToFirstSignificantSubdomainCustomWithWWW", "SELECT cutToFirstSignificantSubdomainCustomWithWWW('www.foo', 'public_suffix_list')"}},
Documentation::Categories{"URL"}
.examples{{"cutToFirstSignificantSubdomainCustomWithWWW", "SELECT cutToFirstSignificantSubdomainCustomWithWWW('www.foo', 'public_suffix_list')", ""}},
.categories{"URL"}
});
factory.registerFunction<FunctionCutToFirstSignificantSubdomainCustomRFC>(
{
R"(Similar to `cutToFirstSignificantSubdomainCustom` but follows stricter rules according to RFC 3986.)",
Documentation::Examples{},
Documentation::Categories{"URL"}
FunctionDocumentation{
.description=R"(Similar to `cutToFirstSignificantSubdomainCustom` but follows stricter rules according to RFC 3986.)",
.examples{},
.categories{"URL"}
});
factory.registerFunction<FunctionCutToFirstSignificantSubdomainCustomWithWWWRFC>(
{
R"(Similar to `cutToFirstSignificantSubdomainCustomWithWWW` but follows stricter rules according to RFC 3986.)",
Documentation::Examples{},
Documentation::Categories{"URL"}
FunctionDocumentation{
.description=R"(Similar to `cutToFirstSignificantSubdomainCustomWithWWW` but follows stricter rules according to RFC 3986.)",
.examples{},
.categories{"URL"}
});
}

View File

@ -14,23 +14,23 @@ using FunctionDomainRFC = FunctionStringToString<ExtractSubstringImpl<ExtractDom
REGISTER_FUNCTION(Domain)
{
factory.registerFunction<FunctionDomain>(
factory.registerFunction<FunctionDomain>(FunctionDocumentation
{
R"(
.description=R"(
Extracts the hostname from a URL.
The URL can be specified with or without a scheme.
If the argument can't be parsed as URL, the function returns an empty string.
)",
Documentation::Examples{{"domain", "SELECT domain('svn+ssh://some.svn-hosting.com:80/repo/trunk')"}},
Documentation::Categories{"URL"}
.examples{{"domain", "SELECT domain('svn+ssh://some.svn-hosting.com:80/repo/trunk')", ""}},
.categories{"URL"}
});
factory.registerFunction<FunctionDomainRFC>(
factory.registerFunction<FunctionDomainRFC>(FunctionDocumentation
{
R"(Similar to `domain` but follows stricter rules to be compatible with RFC 3986 and less performant.)",
Documentation::Examples{},
Documentation::Categories{"URL"}
.description=R"(Similar to `domain` but follows stricter rules to be compatible with RFC 3986 and less performant.)",
.examples{},
.categories{"URL"}
});
}

View File

@ -15,21 +15,21 @@ using FunctionDomainWithoutWWWRFC = FunctionStringToString<ExtractSubstringImpl<
REGISTER_FUNCTION(DomainWithoutWWW)
{
factory.registerFunction<FunctionDomainWithoutWWW>(
{
R"(
FunctionDocumentation{
.description=R"(
Extracts the hostname from a URL, removing the leading "www." if present.
The URL can be specified with or without a scheme.
If the argument can't be parsed as URL, the function returns an empty string.
)",
Documentation::Examples{{"domainWithoutWWW", "SELECT domainWithoutWWW('https://www.clickhouse.com')"}},
Documentation::Categories{"URL"}
.examples{{"domainWithoutWWW", "SELECT domainWithoutWWW('https://www.clickhouse.com')", ""}},
.categories{"URL"}
});
factory.registerFunction<FunctionDomainWithoutWWWRFC>(
{
R"(Similar to `domainWithoutWWW` but follows stricter rules to be compatible with RFC 3986 and less performant.)",
Documentation::Examples{},
Documentation::Categories{"URL"}
FunctionDocumentation{
.description=R"(Similar to `domainWithoutWWW` but follows stricter rules to be compatible with RFC 3986 and less performant.)",
.examples{},
.categories{"URL"}
});
}

View File

@ -15,8 +15,8 @@ using FunctionFirstSignificantSubdomainRFC = FunctionStringToString<ExtractSubst
REGISTER_FUNCTION(FirstSignificantSubdomain)
{
factory.registerFunction<FunctionFirstSignificantSubdomain>(
{
R"(
FunctionDocumentation{
.description=R"(
Returns the "first significant subdomain".
The first significant subdomain is a second-level domain if it is 'com', 'net', 'org', or 'co'.
@ -26,15 +26,15 @@ For example, firstSignificantSubdomain('https://news.clickhouse.com/') = 'clickh
The list of "insignificant" second-level domains and other implementation details may change in the future.
)",
Documentation::Examples{{"firstSignificantSubdomain", "SELECT firstSignificantSubdomain('https://news.clickhouse.com/')"}},
Documentation::Categories{"URL"}
.examples{{"firstSignificantSubdomain", "SELECT firstSignificantSubdomain('https://news.clickhouse.com/')", ""}},
.categories{"URL"}
});
factory.registerFunction<FunctionFirstSignificantSubdomainRFC>(
{
R"(Returns the "first significant subdomain" according to RFC 1034.)",
Documentation::Examples{},
Documentation::Categories{"URL"}
FunctionDocumentation{
.description=R"(Returns the "first significant subdomain" according to RFC 1034.)",
.examples{},
.categories{"URL"}
});
}

View File

@ -138,17 +138,15 @@ struct FunctionPortRFC : public FunctionPortImpl<true>
REGISTER_FUNCTION(Port)
{
factory.registerFunction<FunctionPort>(
factory.registerFunction<FunctionPort>(FunctionDocumentation
{
R"(Returns the port or `default_port` if there is no port in the URL (or in case of validation error).)",
Documentation::Examples{},
Documentation::Categories{"URL"}
.description=R"(Returns the port or `default_port` if there is no port in the URL (or in case of validation error).)",
.categories{"URL"}
});
factory.registerFunction<FunctionPortRFC>(
factory.registerFunction<FunctionPortRFC>(FunctionDocumentation
{
R"(Similar to `port`, but conforms to RFC 3986.)",
Documentation::Examples{},
Documentation::Categories{"URL"}
.description=R"(Similar to `port`, but conforms to RFC 3986.)",
.categories{"URL"}
});
}

View File

@ -53,22 +53,22 @@ using FunctionTopLevelDomainRFC = FunctionStringToString<ExtractSubstringImpl<Ex
REGISTER_FUNCTION(TopLevelDomain)
{
factory.registerFunction<FunctionTopLevelDomain>(
factory.registerFunction<FunctionTopLevelDomain>(FunctionDocumentation
{
R"(
.description=R"(
Extracts the the top-level domain from a URL.
Returns an empty string if the argument cannot be parsed as a URL or does not contain a top-level domain.
)",
Documentation::Examples{{"topLevelDomain", "SELECT topLevelDomain('svn+ssh://www.some.svn-hosting.com:80/repo/trunk')"}},
Documentation::Categories{"URL"}
.examples{{"topLevelDomain", "SELECT topLevelDomain('svn+ssh://www.some.svn-hosting.com:80/repo/trunk')", ""}},
.categories{"URL"}
});
factory.registerFunction<FunctionTopLevelDomainRFC>(
factory.registerFunction<FunctionTopLevelDomainRFC>(FunctionDocumentation
{
R"(Similar to topLevelDomain, but conforms to RFC 3986.)",
Documentation::Examples{},
Documentation::Categories{"URL"}
.description=R"(Similar to topLevelDomain, but conforms to RFC 3986.)",
.examples{},
.categories{"URL"}
});
}

View File

@ -107,17 +107,17 @@ public:
/// UTC_timestamp for MySQL interface support
REGISTER_FUNCTION(UTCTimestamp)
{
factory.registerFunction<UTCTimestampOverloadResolver>({
R"(
factory.registerFunction<UTCTimestampOverloadResolver>(FunctionDocumentation{
.description=R"(
Returns the current date and time at the moment of query analysis. The function is a constant expression.
Same as `now('UTC')`. Was added only for MySQL support. `now` is preferred.
Example:
[example:typical]
)",
Documentation::Examples{
{"typical", "SELECT UTCTimestamp();"}},
Documentation::Categories{"Dates and Times"}}, FunctionFactory::CaseInsensitive);
.examples{
{"typical", "SELECT UTCTimestamp();", ""}},
.categories{"Dates and Times"}}, FunctionFactory::CaseInsensitive);
factory.registerAlias("UTC_timestamp", UTCTimestampOverloadResolver::name, FunctionFactory::CaseInsensitive);
}

View File

@ -10,8 +10,7 @@ namespace DB
REGISTER_FUNCTION(UniqTheta)
{
factory.registerFunction<FunctionUniqThetaIntersect>(
{
R"(
FunctionDocumentation{.description = R"(
Two uniqThetaSketch objects to do intersect calculation(set operation ), the result is a new uniqThetaSketch.
A uniqThetaSketch object is to be constructed by aggregation function uniqTheta with -State.
@ -22,14 +21,13 @@ For more information on RoaringBitmap, see: [Theta Sketch Framework](https://dat
Typical usage:
[example:typical]
)",
Documentation::Examples{
{"typical", "select finalizeAggregation(uniqThetaIntersect(arrayReduce('uniqThetaState',[1,2]), arrayReduce('uniqThetaState',[2,3,4])));"}},
Documentation::Categories{"uniqTheta"}
.examples{
{"typical", "select finalizeAggregation(uniqThetaIntersect(arrayReduce('uniqThetaState',[1,2]), arrayReduce('uniqThetaState',[2,3,4])));", ""}},
.categories{"uniqTheta"}
});
factory.registerFunction<FunctionUniqThetaUnion>(
{
R"(
FunctionDocumentation{.description = R"(
Two uniqThetaSketch objects to do union calculation(set operation ), the result is a new uniqThetaSketch.
A uniqThetaSketch object is to be constructed by aggregation function uniqTheta with -State.
@ -40,13 +38,12 @@ For more information on RoaringBitmap, see: [Theta Sketch Framework](https://dat
Typical usage:
[example:typical]
)",
Documentation::Examples{
{"typical", "select finalizeAggregation(uniqThetaUnion(arrayReduce('uniqThetaState',[1,2]), arrayReduce('uniqThetaState',[2,3,4])));"}},
Documentation::Categories{"uniqTheta"}
.examples{
{"typical", "select finalizeAggregation(uniqThetaUnion(arrayReduce('uniqThetaState',[1,2]), arrayReduce('uniqThetaState',[2,3,4])));", ""}},
.categories{"uniqTheta"}
});
factory.registerFunction<FunctionUniqThetaNot>(
{
R"(
FunctionDocumentation{.description = R"(
Two uniqThetaSketch objects to do a_not_b calculation(set operation ×), the result is a new uniqThetaSketch.
A uniqThetaSketch object is to be constructed by aggregation function uniqTheta with -State.
@ -57,9 +54,9 @@ For more information on RoaringBitmap, see: [Theta Sketch Framework](https://dat
Typical usage:
[example:typical]
)",
Documentation::Examples{
{"typical", "select finalizeAggregation(uniqThetaNot(arrayReduce('uniqThetaState',[1,2]), arrayReduce('uniqThetaState',[2,3,4])));"}},
Documentation::Categories{"uniqTheta"}
.examples{
{"typical", "select finalizeAggregation(uniqThetaNot(arrayReduce('uniqThetaState',[1,2]), arrayReduce('uniqThetaState',[2,3,4])));", ""}},
.categories{"uniqTheta"}
});
}

View File

@ -135,7 +135,7 @@ public:
size_t num_function_arguments = function_type->getArgumentTypes().size();
if (is_single_array_argument
&& tuple_argument_size
&& tuple_argument_size > 1
&& tuple_argument_size == num_function_arguments)
{
assert(nested_types.size() == 1);
@ -337,12 +337,13 @@ public:
}
const auto * column_tuple = checkAndGetColumn<ColumnTuple>(&column_array->getData());
if (is_single_array_argument && column_tuple && column_tuple->getColumns().size() == num_function_arguments)
size_t tuple_size = column_tuple ? column_tuple->getColumns().size() : 0;
if (is_single_array_argument && tuple_size > 1 && tuple_size == num_function_arguments)
{
const auto & type_tuple = assert_cast<const DataTypeTuple &>(*array_type->getNestedType());
const auto & tuple_names = type_tuple.getElementNames();
size_t tuple_size = column_tuple->getColumns().size();
arrays.reserve(column_tuple->getColumns().size());
for (size_t j = 0; j < tuple_size; ++j)
{

View File

@ -363,101 +363,101 @@ using FunctionMapPartialReverseSort = FunctionMapToArrayAdapter<FunctionArrayPar
REGISTER_FUNCTION(MapMiscellaneous)
{
factory.registerFunction<FunctionMapConcat>(
{
"The same as arrayConcat.",
Documentation::Examples{{"mapConcat", "SELECT mapConcat(map('k1', 'v1'), map('k2', 'v2'))"}},
Documentation::Categories{"Map"},
FunctionDocumentation{
.description="The same as arrayConcat.",
.examples{{"mapConcat", "SELECT mapConcat(map('k1', 'v1'), map('k2', 'v2'))", ""}},
.categories{"Map"},
});
factory.registerFunction<FunctionMapKeys>(
{
"Returns an array with the keys of map.",
Documentation::Examples{{"mapKeys", "SELECT mapKeys(map('k1', 'v1', 'k2', 'v2'))"}},
Documentation::Categories{"Map"},
FunctionDocumentation{
.description="Returns an array with the keys of map.",
.examples{{"mapKeys", "SELECT mapKeys(map('k1', 'v1', 'k2', 'v2'))", ""}},
.categories{"Map"},
});
factory.registerFunction<FunctionMapValues>(
{
"Returns an array with the values of map.",
Documentation::Examples{{"mapValues", "SELECT mapValues(map('k1', 'v1', 'k2', 'v2'))"}},
Documentation::Categories{"Map"},
FunctionDocumentation{
.description="Returns an array with the values of map.",
.examples{{"mapValues", "SELECT mapValues(map('k1', 'v1', 'k2', 'v2'))", ""}},
.categories{"Map"},
});
factory.registerFunction<FunctionMapContains>(
{
"Checks whether the map has the specified key.",
Documentation::Examples{{"mapContains", "SELECT mapContains(map('k1', 'v1', 'k2', 'v2'), 'k1')"}},
Documentation::Categories{"Map"},
FunctionDocumentation{
.description="Checks whether the map has the specified key.",
.examples{{"mapContains", "SELECT mapContains(map('k1', 'v1', 'k2', 'v2'), 'k1')", ""}},
.categories{"Map"},
});
factory.registerFunction<FunctionMapFilter>(
{
"The same as arrayFilter.",
Documentation::Examples{{"mapFilter", "SELECT mapFilter((k, v) -> v > 1, map('k1', 1, 'k2', 2))"}},
Documentation::Categories{"Map"},
FunctionDocumentation{
.description="The same as arrayFilter.",
.examples{{"mapFilter", "SELECT mapFilter((k, v) -> v > 1, map('k1', 1, 'k2', 2))", ""}},
.categories{"Map"},
});
factory.registerFunction<FunctionMapApply>(
{
"The same as arrayMap.",
Documentation::Examples{{"mapApply", "SELECT mapApply((k, v) -> (k, v * 2), map('k1', 1, 'k2', 2))"}},
Documentation::Categories{"Map"},
FunctionDocumentation{
.description="The same as arrayMap.",
.examples{{"mapApply", "SELECT mapApply((k, v) -> (k, v * 2), map('k1', 1, 'k2', 2))", ""}},
.categories{"Map"},
});
factory.registerFunction<FunctionMapExists>(
{
"The same as arrayExists.",
Documentation::Examples{{"mapExists", "SELECT mapExists((k, v) -> v = 1, map('k1', 1, 'k2', 2))"}},
Documentation::Categories{"Map"},
FunctionDocumentation{
.description="The same as arrayExists.",
.examples{{"mapExists", "SELECT mapExists((k, v) -> v = 1, map('k1', 1, 'k2', 2))", ""}},
.categories{"Map"},
});
factory.registerFunction<FunctionMapAll>(
{
"The same as arrayAll.",
Documentation::Examples{{"mapAll", "SELECT mapAll((k, v) -> v = 1, map('k1', 1, 'k2', 2))"}},
Documentation::Categories{"Map"},
FunctionDocumentation{
.description="The same as arrayAll.",
.examples{{"mapAll", "SELECT mapAll((k, v) -> v = 1, map('k1', 1, 'k2', 2))", ""}},
.categories{"Map"},
});
factory.registerFunction<FunctionMapSort>(
{
"The same as arraySort.",
Documentation::Examples{{"mapSort", "SELECT mapSort((k, v) -> v, map('k1', 3, 'k2', 1, 'k3', 2))"}},
Documentation::Categories{"Map"},
FunctionDocumentation{
.description="The same as arraySort.",
.examples{{"mapSort", "SELECT mapSort((k, v) -> v, map('k1', 3, 'k2', 1, 'k3', 2))", ""}},
.categories{"Map"},
});
factory.registerFunction<FunctionMapReverseSort>(
{
"The same as arrayReverseSort.",
Documentation::Examples{{"mapReverseSort", "SELECT mapReverseSort((k, v) -> v, map('k1', 3, 'k2', 1, 'k3', 2))"}},
Documentation::Categories{"Map"},
FunctionDocumentation{
.description="The same as arrayReverseSort.",
.examples{{"mapReverseSort", "SELECT mapReverseSort((k, v) -> v, map('k1', 3, 'k2', 1, 'k3', 2))", ""}},
.categories{"Map"},
});
factory.registerFunction<FunctionMapPartialSort>(
{
"The same as arrayReverseSort.",
Documentation::Examples{{"mapPartialSort", "SELECT mapPartialSort((k, v) -> v, 2, map('k1', 3, 'k2', 1, 'k3', 2))"}},
Documentation::Categories{"Map"},
FunctionDocumentation{
.description="The same as arrayReverseSort.",
.examples{{"mapPartialSort", "SELECT mapPartialSort((k, v) -> v, 2, map('k1', 3, 'k2', 1, 'k3', 2))", ""}},
.categories{"Map"},
});
factory.registerFunction<FunctionMapPartialReverseSort>(
{
"The same as arrayPartialReverseSort.",
Documentation::Examples{{"mapPartialReverseSort", "SELECT mapPartialReverseSort((k, v) -> v, 2, map('k1', 3, 'k2', 1, 'k3', 2))"}},
Documentation::Categories{"Map"},
FunctionDocumentation{
.description="The same as arrayPartialReverseSort.",
.examples{{"mapPartialReverseSort", "SELECT mapPartialReverseSort((k, v) -> v, 2, map('k1', 3, 'k2', 1, 'k3', 2))", ""}},
.categories{"Map"},
});
factory.registerFunction<FunctionMapContainsKeyLike>(
{
"Checks whether map contains key LIKE specified pattern.",
Documentation::Examples{{"mapContainsKeyLike", "SELECT mapContainsKeyLike(map('k1-1', 1, 'k2-1', 2), 'k1%')"}},
Documentation::Categories{"Map"},
FunctionDocumentation{
.description="Checks whether map contains key LIKE specified pattern.",
.examples{{"mapContainsKeyLike", "SELECT mapContainsKeyLike(map('k1-1', 1, 'k2-1', 2), 'k1%')", ""}},
.categories{"Map"},
});
factory.registerFunction<FunctionMapExtractKeyLike>(
{
"Returns a map with elements which key matches the specified pattern.",
Documentation::Examples{{"mapExtractKeyLike", "SELECT mapExtractKeyLike(map('k1-1', 1, 'k2-1', 2), 'k1%')"}},
Documentation::Categories{"Map"},
FunctionDocumentation{
.description="Returns a map with elements which key matches the specified pattern.",
.examples{{"mapExtractKeyLike", "SELECT mapExtractKeyLike(map('k1-1', 1, 'k2-1', 2), 'k1%')", ""}},
.categories{"Map"},
});
}

Some files were not shown because too many files have changed in this diff Show More