Merge branch 'master' into iaadeflate_swpath_compat

2024-11-22 15:42:02 +00:00 · 2023-05-12 16:39:17 +02:00 · 2023-05-12 16:39:17 +02:00 · 9d34233e65
commit 9d34233e65
parent e72a6fe973 596e50ebc0
282 changed files with 4414 additions and 2370 deletions
--- a/.clang-tidy
+++ b/.clang-tidy
@ -111,6 +111,7 @@ Checks: '*,
    -misc-no-recursion,
    -misc-non-private-member-variables-in-classes,
    -misc-confusable-identifiers, # useful but slooow
+    -misc-use-anonymous-namespace,

    -modernize-avoid-c-arrays,
    -modernize-concat-nested-namespaces,
--- a/.github/workflows/nightly.yml
+++ b/.github/workflows/nightly.yml
@ -125,8 +125,8 @@ jobs:
      SONAR_SCANNER_VERSION: 4.8.0.2856
      SONAR_SERVER_URL: "https://sonarcloud.io"
      BUILD_WRAPPER_OUT_DIR: build_wrapper_output_directory # Directory where build-wrapper output will be placed
-      CC: clang-15
-      CXX: clang++-15
+      CC: clang-16
+      CXX: clang++-16
    steps:
      - name: Check out repository code
        uses: ClickHouse/checkout@v1
--- a/.gitmodules
+++ b/.gitmodules
@ -267,7 +267,10 @@
 	url = https://github.com/ClickHouse/nats.c
 [submodule "contrib/vectorscan"]
 	path = contrib/vectorscan
-	url = https://github.com/VectorCamp/vectorscan
+	# FIXME: update once upstream fixes will be merged:
+	# - https://github.com/VectorCamp/vectorscan/pull/148
+	# - https://github.com/VectorCamp/vectorscan/pull/149
+	url = https://github.com/azat-ch/vectorscan
 [submodule "contrib/c-ares"]
 	path = contrib/c-ares
 	url = https://github.com/ClickHouse/c-ares
@ -338,6 +341,9 @@
 [submodule "contrib/liburing"]
 	path = contrib/liburing
 	url = https://github.com/axboe/liburing
+[submodule "contrib/libfiu"]
+	path = contrib/libfiu
+	url = https://github.com/ClickHouse/libfiu.git
 [submodule "contrib/isa-l"]
 	path = contrib/isa-l
 	url = https://github.com/ClickHouse/isa-l.git
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -342,13 +342,6 @@ if (COMPILER_CLANG)

    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstrict-vtable-pointers")

-    if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 16)
-        # Set new experimental pass manager, it's a performance, build time and binary size win.
-        # Can be removed after https://reviews.llvm.org/D66490 merged and released to at least two versions of clang.
-        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexperimental-new-pass-manager")
-        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fexperimental-new-pass-manager")
-    endif ()
-
    # We cannot afford to use LTO when compiling unit tests, and it's not enough
    # to only supply -fno-lto at the final linking stage. So we disable it
    # completely.
@ -395,6 +388,8 @@ if ((NOT OS_LINUX AND NOT OS_ANDROID) OR (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG"))
    set(ENABLE_GWP_ASAN OFF)
 endif ()

+option (ENABLE_FIU "Enable Fiu" ON)
+
 option(WERROR "Enable -Werror compiler option" ON)

 if (WERROR)
--- a/README.md
+++ b/README.md
@ -21,11 +21,17 @@ curl https://clickhouse.com/ | sh
 * [Contacts](https://clickhouse.com/company/contact) can help to get your questions answered if there are any.

 ## Upcoming Events
-* [**ClickHouse Spring Meetup in Manhattan**](https://www.meetup.com/clickhouse-new-york-user-group/events/292517734) - April 26 - It's spring, and it's time to meet again in the city! Talks include: "Building a domain specific query language on top of Clickhouse", "A Galaxy of Information", "Our Journey to ClickHouse Cloud from Redshift", and a ClickHouse update!
-* [**v23.4 Release Webinar**](https://clickhouse.com/company/events/v23-4-release-webinar?utm_source=github&utm_medium=social&utm_campaign=release-webinar-2023-04) - April 26 - 23.4 is rapidly approaching. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release.
-* [**ClickHouse Meetup in Berlin**](https://www.meetup.com/clickhouse-berlin-user-group/events/292892466) - May 16 - Save the date! ClickHouse is coming back to Berlin. We’re excited to announce an upcoming ClickHouse Meetup that you won’t want to miss. Join us as we gather together to discuss the latest in the world of ClickHouse and share user stories.
+
+* [**v23.5 Release Webinar**](https://clickhouse.com/company/events/v23-5-release-webinar?utm_source=github&utm_medium=social&utm_campaign=release-webinar-2023-05) - May 31 - 23.5 is rapidly approaching. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release.
+* [**ClickHouse Meetup in Berlin**](https://www.meetup.com/clickhouse-berlin-user-group/events/292892466) - May 16 
+* [**ClickHouse Meetup in Barcelona**](https://www.meetup.com/clickhouse-barcelona-user-group/events/292892669) - May 25 
+* [**ClickHouse Meetup in London**](https://www.meetup.com/clickhouse-london-user-group/events/292892824) - May 25 
+* [**ClickHouse Meetup in San Francisco**](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/293426725/) - Jun 7 
+* [**ClickHouse Meetup in Stockholm**](https://www.meetup.com/clickhouse-berlin-user-group/events/292892466) - Jun 13 
+
+Also, keep an eye out for upcoming meetups in Amsterdam, Boston, NYC, Beijing, and Toronto. Somewhere else you want us to be? Please feel free to reach out to tyler <at> clickhouse <dot> com.

 ## Recent Recordings
 * **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments"
-* **Recording available**: [**v23.3 Release Webinar**](https://www.youtube.com/watch?v=ISaGUjvBNao) UNDROP TABLE, server settings introspection, nested dynamic disks, MySQL compatibility, parseDate Time, Lightweight Deletes, Parallel Replicas, integrations updates, and so much more! Watch it now!
+* **Recording available**: [**v23.4 Release Webinar**](https://www.youtube.com/watch?v=4rrf6bk_mOg) Faster Parquet Reading, Asynchonous Connections to Reoplicas, Trailing Comma before FROM, extractKeyValuePairs, integrations updates, and so much more! Watch it now!
 * **All release webinar recordings**: [YouTube playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3jAlSy1JxyP8zluvXaN3nxU)
--- a/cmake/sanitize.cmake
+++ b/cmake/sanitize.cmake
@ -10,9 +10,16 @@ set (SAN_FLAGS "${SAN_FLAGS} -g -fno-omit-frame-pointer -DSANITIZER")

 if (SANITIZE)
    if (SANITIZE STREQUAL "address")
-        # LLVM-15 has a bug in Address Sanitizer, preventing the usage of 'sanitize-address-use-after-scope',
-        # see https://github.com/llvm/llvm-project/issues/58633
-        set (ASAN_FLAGS "-fsanitize=address -fno-sanitize-address-use-after-scope")
+        set (ASAN_FLAGS "-fsanitize=address -fsanitize-address-use-after-scope")
+        if (COMPILER_CLANG)
+            if (${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER_EQUAL 15 AND ${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 16)
+                # LLVM-15 has a bug in Address Sanitizer, preventing the usage
+                # of 'sanitize-address-use-after-scope', see [1].
+                #
+                #   [1]: https://github.com/llvm/llvm-project/issues/58633
+                set (ASAN_FLAGS "${ASAN_FLAGS} -fno-sanitize-address-use-after-scope")
+            endif()
+        endif()
        set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${ASAN_FLAGS}")
        set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} ${ASAN_FLAGS}")

--- a/cmake/tools.cmake
+++ b/cmake/tools.cmake
@ -70,12 +70,14 @@ if (LINKER_NAME)
    if (NOT LLD_PATH)
        message (FATAL_ERROR "Using linker ${LINKER_NAME} but can't find its path.")
    endif ()
-    if (COMPILER_CLANG)
-        # This a temporary quirk to emit .debug_aranges with ThinLTO, can be removed after upgrade to clang-16
+    # This a temporary quirk to emit .debug_aranges with ThinLTO, it is only the case clang/llvm <16
+    if (COMPILER_CLANG AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 16)
        set (LLD_WRAPPER "${CMAKE_CURRENT_BINARY_DIR}/ld.lld")
        configure_file ("${CMAKE_CURRENT_SOURCE_DIR}/cmake/ld.lld.in" "${LLD_WRAPPER}" @ONLY)

        set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --ld-path=${LLD_WRAPPER}")
+    else ()
+        set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --ld-path=${LLD_PATH}")
    endif()

 endif ()
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@ -105,6 +105,7 @@ add_contrib (libfarmhash)
 add_contrib (icu-cmake icu)
 add_contrib (h3-cmake h3)
 add_contrib (mariadb-connector-c-cmake mariadb-connector-c)
+add_contrib (libfiu-cmake libfiu)

 if (ENABLE_TESTS)
    add_contrib (googletest-cmake googletest)
--- a/contrib/libfiu
+++ b/contrib/libfiu
@ -0,0 +1 @@
+Subproject commit b85edbde4cf974b1b40d27828a56f0505f4e2ee5
--- a/contrib/libfiu-cmake/CMakeLists.txt
+++ b/contrib/libfiu-cmake/CMakeLists.txt
@ -0,0 +1,20 @@
+if (NOT ENABLE_FIU)
+  message (STATUS "Not using fiu")
+  return ()
+endif ()
+
+set(FIU_DIR "${ClickHouse_SOURCE_DIR}/contrib/libfiu/")
+
+set(FIU_SOURCES
+  ${FIU_DIR}/libfiu/fiu.c
+  ${FIU_DIR}/libfiu/fiu-rc.c
+  ${FIU_DIR}/libfiu/backtrace.c
+  ${FIU_DIR}/libfiu/wtable.c
+)
+
+set(FIU_HEADERS "${FIU_DIR}/libfiu")
+
+add_library(_fiu ${FIU_SOURCES})
+target_compile_definitions(_fiu PUBLIC DUMMY_BACKTRACE)
+target_include_directories(_fiu PUBLIC ${FIU_HEADERS})
+add_library(ch_contrib::fiu ALIAS _fiu)
--- a/contrib/vectorscan
+++ b/contrib/vectorscan
@ -1 +1 @@
-Subproject commit b4bba94b1a250603b0b198e0394946e32f6c3f30
+Subproject commit aaca65aa210ce3ec91bd2b249c4d59e55e80a869
--- a/docker/packager/packager
+++ b/docker/packager/packager
@ -362,17 +362,16 @@ def parse_args() -> argparse.Namespace:
    parser.add_argument(
        "--compiler",
        choices=(
-            "clang-15",
-            "clang-15-darwin",
-            "clang-15-darwin-aarch64",
-            "clang-15-aarch64",
-            "clang-15-aarch64-v80compat",
-            "clang-15-ppc64le",
-            "clang-15-amd64-compat",
-            "clang-15-freebsd",
-            "gcc-11",
+            "clang-16",
+            "clang-16-darwin",
+            "clang-16-darwin-aarch64",
+            "clang-16-aarch64",
+            "clang-16-aarch64-v80compat",
+            "clang-16-ppc64le",
+            "clang-16-amd64-compat",
+            "clang-16-freebsd",
        ),
-        default="clang-15",
+        default="clang-16",
        help="a compiler to use",
    )
    parser.add_argument(
--- a/docker/test/codebrowser/Dockerfile
+++ b/docker/test/codebrowser/Dockerfile
@ -10,35 +10,20 @@ RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list

 RUN apt-get update && apt-get --yes --allow-unauthenticated install libclang-${LLVM_VERSION}-dev libmlir-${LLVM_VERSION}-dev

-# libclang-15-dev does not contain proper symlink:
-#
-# This is what cmake will search for:
-#
-#     # readlink -f /usr/lib/llvm-15/lib/libclang-15.so.1
-#     /usr/lib/x86_64-linux-gnu/libclang-15.so.1
-#
-# This is what exists:
-#
-#     # ls -l /usr/lib/x86_64-linux-gnu/libclang-15*
-#     lrwxrwxrwx 1 root root       16 Sep  5 13:31 /usr/lib/x86_64-linux-gnu/libclang-15.so -> libclang-15.so.1
-#     lrwxrwxrwx 1 root root       21 Sep  5 13:31 /usr/lib/x86_64-linux-gnu/libclang-15.so.15 -> libclang-15.so.15.0.0
-#     -rw-r--r-- 1 root root 31835760 Sep  5 13:31 /usr/lib/x86_64-linux-gnu/libclang-15.so.15.0.0
-#
 ARG TARGETARCH
 RUN arch=${TARGETARCH:-amd64} \
    && case $arch in \
        amd64) rarch=x86_64 ;; \
        arm64) rarch=aarch64 ;; \
        *) exit 1 ;; \
-    esac \
-    && ln -rsf /usr/lib/$rarch-linux-gnu/libclang-15.so.15 /usr/lib/$rarch-linux-gnu/libclang-15.so.1
+    esac

 # repo versions doesn't work correctly with C++17
 # also we push reports to s3, so we add index.html to subfolder urls
 # https://github.com/ClickHouse-Extras/woboq_codebrowser/commit/37e15eaf377b920acb0b48dbe82471be9203f76b
 RUN git clone --depth=1 https://github.com/ClickHouse/woboq_codebrowser /woboq_codebrowser \
  && cd /woboq_codebrowser \
-  && cmake . -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER=clang\+\+-${LLVM_VERSION} -DCMAKE_C_COMPILER=clang-${LLVM_VERSION} \
+  && cmake . -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER=clang\+\+-${LLVM_VERSION} -DCMAKE_C_COMPILER=clang-${LLVM_VERSION} -DCLANG_BUILTIN_HEADERS_DIR=/usr/lib/llvm-${LLVM_VERSION}/lib/clang/${LLVM_VERSION}/include \
  && ninja

 ENV CODEGEN=/woboq_codebrowser/generator/codebrowser_generator
--- a/docker/test/fasttest/run.sh
+++ b/docker/test/fasttest/run.sh
@ -9,7 +9,7 @@ trap 'kill $(jobs -pr) ||:' EXIT
 stage=${stage:-}

 # Compiler version, normally set by Dockerfile
-export LLVM_VERSION=${LLVM_VERSION:-13}
+export LLVM_VERSION=${LLVM_VERSION:-16}

 # A variable to pass additional flags to CMake.
 # Here we explicitly default it to nothing so that bash doesn't complain about
@ -147,6 +147,7 @@ function clone_submodules
            contrib/xxHash
            contrib/simdjson
            contrib/liburing
+            contrib/libfiu
        )

        git submodule sync
--- a/docker/test/fuzzer/run-fuzzer.sh
+++ b/docker/test/fuzzer/run-fuzzer.sh
@ -15,7 +15,7 @@ stage=${stage:-}
 script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 echo "$script_dir"
 repo_dir=ch
-BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-15_debug_none_unsplitted_disable_False_binary"}
+BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-16_debug_none_unsplitted_disable_False_binary"}
 BINARY_URL_TO_DOWNLOAD=${BINARY_URL_TO_DOWNLOAD:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/$BINARY_TO_DOWNLOAD/clickhouse"}

 function git_clone_with_retry
--- a/docker/test/keeper-jepsen/run.sh
+++ b/docker/test/keeper-jepsen/run.sh
@ -2,7 +2,7 @@
 set -euo pipefail


-CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-15_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"}
+CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-16_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"}
 CLICKHOUSE_REPO_PATH=${CLICKHOUSE_REPO_PATH:=""}


--- a/docker/test/server-jepsen/run.sh
+++ b/docker/test/server-jepsen/run.sh
@ -2,7 +2,7 @@
 set -euo pipefail


-CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-15_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"}
+CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-16_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"}
 CLICKHOUSE_REPO_PATH=${CLICKHOUSE_REPO_PATH:=""}


--- a/docker/test/stress/run.sh
+++ b/docker/test/stress/run.sh
@ -20,9 +20,6 @@ install_packages package_folder

 # Thread Fuzzer allows to check more permutations of possible thread scheduling
 # and find more potential issues.
-# Temporarily disable ThreadFuzzer with tsan because of https://github.com/google/sanitizers/issues/1540
-is_tsan_build=$(clickhouse local -q "select value like '% -fsanitize=thread %' from system.build_options where name='CXX_FLAGS'")
-if [ "$is_tsan_build" -eq "0" ]; then
 export THREAD_FUZZER_CPU_TIME_PERIOD_US=1000
 export THREAD_FUZZER_SLEEP_PROBABILITY=0.1
 export THREAD_FUZZER_SLEEP_TIME_US=100000
@ -44,7 +41,6 @@ if [ "$is_tsan_build" -eq "0" ]; then

 export THREAD_FUZZER_EXPLICIT_SLEEP_PROBABILITY=0.01
 export THREAD_FUZZER_EXPLICIT_MEMORY_EXCEPTION_PROBABILITY=0.01
-fi

 export ZOOKEEPER_FAULT_INJECTION=1
 # Initial run without S3 to create system.*_log on local file system to make it
--- a/docker/test/util/Dockerfile
+++ b/docker/test/util/Dockerfile
@ -6,7 +6,7 @@ ARG apt_archive="http://archive.ubuntu.com"
 RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list

 # 15.0.2
-ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=15
+ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=16

 RUN apt-get update \
    && apt-get install \
@ -52,6 +52,7 @@ RUN apt-get update \
        lld-${LLVM_VERSION} \
        llvm-${LLVM_VERSION} \
        llvm-${LLVM_VERSION}-dev \
+        libclang-${LLVM_VERSION}-dev \
        moreutils \
        nasm \
        ninja-build \
--- a/docs/en/development/build-cross-osx.md
+++ b/docs/en/development/build-cross-osx.md
@ -11,14 +11,14 @@ This is intended for continuous integration checks that run on Linux servers. If

 The cross-build for macOS is based on the [Build instructions](../development/build.md), follow them first.

-## Install Clang-15
+## Install Clang-16

 Follow the instructions from https://apt.llvm.org/ for your Ubuntu or Debian setup.
 For example the commands for Bionic are like:

 ``` bash
-sudo echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-15 main" >> /etc/apt/sources.list
-sudo apt-get install clang-15
+sudo echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-16 main" >> /etc/apt/sources.list
+sudo apt-get install clang-16
 ```

 ## Install Cross-Compilation Toolset {#install-cross-compilation-toolset}
@ -55,7 +55,7 @@ curl -L 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX1
 cd ClickHouse
 mkdir build-darwin
 cd build-darwin
-CC=clang-15 CXX=clang++-15 cmake -DCMAKE_AR:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ar -DCMAKE_INSTALL_NAME_TOOL=${CCTOOLS}/bin/x86_64-apple-darwin-install_name_tool -DCMAKE_RANLIB:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ranlib -DLINKER_NAME=${CCTOOLS}/bin/x86_64-apple-darwin-ld -DCMAKE_TOOLCHAIN_FILE=cmake/darwin/toolchain-x86_64.cmake ..
+CC=clang-16 CXX=clang++-16 cmake -DCMAKE_AR:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ar -DCMAKE_INSTALL_NAME_TOOL=${CCTOOLS}/bin/x86_64-apple-darwin-install_name_tool -DCMAKE_RANLIB:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ranlib -DLINKER_NAME=${CCTOOLS}/bin/x86_64-apple-darwin-ld -DCMAKE_TOOLCHAIN_FILE=cmake/darwin/toolchain-x86_64.cmake ..
 ninja
 ```

--- a/docs/en/development/build-cross-riscv.md
+++ b/docs/en/development/build-cross-riscv.md
@ -11,7 +11,7 @@ This is for the case when you have Linux machine and want to use it to build `cl

 The cross-build for RISC-V 64 is based on the [Build instructions](../development/build.md), follow them first.

-## Install Clang-13
+## Install Clang-16

 Follow the instructions from https://apt.llvm.org/ for your Ubuntu or Debian setup or do
 ```
@ -23,7 +23,7 @@ sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
 ``` bash
 cd ClickHouse
 mkdir build-riscv64
-CC=clang-14 CXX=clang++-14 cmake . -Bbuild-riscv64 -G Ninja -DCMAKE_TOOLCHAIN_FILE=cmake/linux/toolchain-riscv64.cmake -DGLIBC_COMPATIBILITY=OFF -DENABLE_LDAP=OFF  -DOPENSSL_NO_ASM=ON -DENABLE_JEMALLOC=ON -DENABLE_PARQUET=OFF -DUSE_UNWIND=OFF -DENABLE_GRPC=OFF -DENABLE_HDFS=OFF -DENABLE_MYSQL=OFF
+CC=clang-16 CXX=clang++-16 cmake . -Bbuild-riscv64 -G Ninja -DCMAKE_TOOLCHAIN_FILE=cmake/linux/toolchain-riscv64.cmake -DGLIBC_COMPATIBILITY=OFF -DENABLE_LDAP=OFF  -DOPENSSL_NO_ASM=ON -DENABLE_JEMALLOC=ON -DENABLE_PARQUET=OFF -DUSE_UNWIND=OFF -DENABLE_GRPC=OFF -DENABLE_HDFS=OFF -DENABLE_MYSQL=OFF
 ninja -C build-riscv64
 ```

--- a/docs/en/development/build.md
+++ b/docs/en/development/build.md
@ -47,8 +47,8 @@ GCC as a compiler is not supported
 To build with a specific Clang version:

 ``` bash
-export CC=clang-15
-export CXX=clang++-15
+export CC=clang-16
+export CXX=clang++-16
 ```

 ### Checkout ClickHouse Sources {#checkout-clickhouse-sources}
--- a/docs/en/development/continuous-integration.md
+++ b/docs/en/development/continuous-integration.md
@ -102,7 +102,7 @@ Builds ClickHouse in various configurations for use in further steps. You have t

 ### Report Details

- **Compiler**: `clang-15`, optionally with the name of a target platform
+- **Compiler**: `clang-16`, optionally with the name of a target platform
 - **Build type**: `Debug` or `RelWithDebInfo` (cmake).
 - **Sanitizer**: `none` (without sanitizers), `address` (ASan), `memory` (MSan), `undefined` (UBSan), or `thread` (TSan).
 - **Status**: `success` or `fail`
--- a/docs/en/development/developer-instruction.md
+++ b/docs/en/development/developer-instruction.md
@ -152,7 +152,7 @@ While inside the `build` directory, configure your build by running CMake. Befor
    export CC=clang CXX=clang++
    cmake ..

-If you installed clang using the automatic installation script above, also specify the version of clang installed in the first command, e.g. `export CC=clang-15 CXX=clang++-15`. The clang version will be in the script output.
+If you installed clang using the automatic installation script above, also specify the version of clang installed in the first command, e.g. `export CC=clang-16 CXX=clang++-16`. The clang version will be in the script output.

 The `CC` variable specifies the compiler for C (short for C Compiler), and `CXX` variable instructs which C++ compiler is to be used for building.

--- a/docs/en/operations/settings/settings-users.md
+++ b/docs/en/operations/settings/settings-users.md
@ -38,6 +38,10 @@ Structure of the `users` section:
                </table_name>
            </database_name>
        </databases>
+        
+        <grants>
+            <query>GRANT SELECT ON system.*</query>
+        </grants>
    </user_name>
    <!-- Other users settings -->
 </users>
@ -86,6 +90,28 @@ Possible values:

 Default value: 0.

+### grants {#grants-user-setting}
+
+This setting allows to grant any rights to selected user.
+Each element of the list should be `GRANT` query without any grantees specified.
+
+Example:
+
+```xml
+<user1>
+    <grants>
+        <query>GRANT SHOW ON *.*</query>
+        <query>GRANT CREATE ON *.* WITH GRANT OPTION</query>
+        <query>GRANT SELECT ON system.*</query>
+    </grants>
+</user1>
+```
+
+This setting can't be specified at the same time with
+`dictionaries`, `access_management`, `named_collection_control`, `show_named_collections_secrets`
+and `allow_databases` settings.
+
+
 ### user_name/networks {#user-namenetworks}

 List of networks from which the user can connect to the ClickHouse server.
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@ -1125,6 +1125,12 @@ If unsuccessful, several attempts are made to connect to various replicas.

 Default value: 1000.

+## connect_timeout_with_failover_secure_ms
+
+Connection timeout for selecting first healthy replica (for secure connections)
+
+Default value: 1000.
+
 ## connection_pool_max_wait_ms {#connection-pool-max-wait-ms}

 The wait time in milliseconds for a connection when the connection pool is full.
@ -1630,7 +1636,7 @@ For not replicated tables see [non_replicated_deduplication_window](merge-tree-s

 ### async_insert {#async-insert}

-Enables or disables asynchronous inserts. This makes sense only for insertion over HTTP protocol. Note that deduplication isn't working for such inserts.
+Enables or disables asynchronous inserts. Note that deduplication is disabled by default, see [async_insert_deduplicate](#async-insert-deduplicate).

 If enabled, the data is combined into batches before the insertion into tables, so it is possible to do small and frequent insertions into ClickHouse (up to 15000 queries per second) without buffer tables.

@ -3562,7 +3568,7 @@ Default value: `1`.

 If the setting is set to `0`, the table function does not make Nullable columns and inserts default values instead of NULL. This is also applicable for NULL values inside arrays.

-## allow_experimental_projection_optimization {#allow-experimental-projection-optimization}
+## optimize_use_projections {#optimize_use_projections}

 Enables or disables [projection](../../engines/table-engines/mergetree-family/mergetree.md/#projections) optimization when processing `SELECT` queries.

@ -3575,7 +3581,7 @@ Default value: `1`.

 ## force_optimize_projection {#force-optimize-projection}

-Enables or disables the obligatory use of [projections](../../engines/table-engines/mergetree-family/mergetree.md/#projections) in `SELECT` queries, when projection optimization is enabled (see [allow_experimental_projection_optimization](#allow-experimental-projection-optimization) setting).
+Enables or disables the obligatory use of [projections](../../engines/table-engines/mergetree-family/mergetree.md/#projections) in `SELECT` queries, when projection optimization is enabled (see [optimize_use_projections](#optimize_use_projections) setting).

 Possible values:

--- a/docs/en/operations/storing-data.md
+++ b/docs/en/operations/storing-data.md
@ -215,7 +215,7 @@ Cache **system tables**:

 Cache **commands**:

- `SYSTEM DROP FILESYSTEM CACHE (<path>) (ON CLUSTER)`
+- `SYSTEM DROP FILESYSTEM CACHE (<cache_name>) (ON CLUSTER)` -- `ON CLUSTER` is only supported when no `<cache_name>` is provided

 - `SHOW FILESYSTEM CACHES` -- show list of filesystem caches which were configured on the server. (For versions <= `22.8` the command is named `SHOW CACHES`)

@ -231,10 +231,10 @@ Result:
 └───────────┘
 ```

- `DESCRIBE CACHE '<cache_name>'` - show cache configuration and some general statistics for a specific cache. Cache name can be taken from `SHOW CACHES` command. (For versions <= `22.8` the command is named `DESCRIBE CACHE`)
+- `DESCRIBE FILESYSTEM CACHE '<cache_name>'` - show cache configuration and some general statistics for a specific cache. Cache name can be taken from `SHOW FILESYSTEM CACHES` command. (For versions <= `22.8` the command is named `DESCRIBE CACHE`)

 ```sql
-DESCRIBE CACHE 's3_cache'
+DESCRIBE FILESYSTEM CACHE 's3_cache'
 ```

 ``` text
--- a/docs/en/sql-reference/aggregate-functions/reference/first_value.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/first_value.md
@ -0,0 +1,55 @@
+---
+slug: /en/sql-reference/aggregate-functions/reference/first_value
+sidebar_position: 7
+---
+
+# first_value
+
+Selects the first encountered value, similar to `any`, but could accept NULL.
+
+## examples
+
+```sql
+insert into test_data (a,b) values (1,null), (2,3), (4, 5), (6,null)
+```
+
+### example1
+The NULL value is ignored at default.
+```sql
+select first_value(b) from test_data
+```
+
+```text
+┌─first_value_ignore_nulls(b)─┐
+│                           3 │
+└─────────────────────────────┘
+
+```
+
+### example2
+The NULL value is ignored.
+```sql
+select first_value(b) ignore nulls sfrom test_data
+```
+
+```text
+┌─first_value_ignore_nulls(b)─┐
+│                           3 │
+└─────────────────────────────┘
+
+```
+
+### example3
+The NULL value is accepted.
+```sql
+select first_value(b) respect nulls from test_data
+```
+
+```text
+
+┌─first_value_respect_nulls(b)─┐
+│                         ᴺᵁᴸᴸ │
+└──────────────────────────────┘
+```
+
+
--- a/docs/en/sql-reference/aggregate-functions/reference/greatest.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/greatest.md
@ -0,0 +1,48 @@
+---
+slug: /en/sql-reference/aggregate-functions/reference/greatest
+title: greatest
+---
+
+Aggregate function that returns the greatest across a list of values.  All of the list members must be of comparable types.
+
+Examples:
+
+```sql
+SELECT
+    toTypeName(greatest(toUInt8(1), 2, toUInt8(3), 3.)),
+    greatest(1, 2, toUInt8(3), 3.)
+```
+```response
+┌─toTypeName(greatest(toUInt8(1), 2, toUInt8(3), 3.))─┬─greatest(1, 2, toUInt8(3), 3.)─┐
+│ Float64                                             │                              3 │
+└─────────────────────────────────────────────────────┴────────────────────────────────┘
+```
+
+:::note
+The type returned is a Float64 as the UInt8 must be promoted to 64 bit for the comparison.
+:::
+
+```sql
+SELECT greatest(['hello'], ['there'], ['world'])
+```
+```response
+┌─greatest(['hello'], ['there'], ['world'])─┐
+│ ['world']                                 │
+└───────────────────────────────────────────┘
+```
+
+```sql
+SELECT greatest(toDateTime32(now() + toIntervalDay(1)), toDateTime64(now(), 3))
+```
+```response
+┌─greatest(toDateTime32(plus(now(), toIntervalDay(1))), toDateTime64(now(), 3))─┐
+│                                                       2023-05-12 01:16:59.000 │
+└──---──────────────────────────────────────────────────────────────────────────┘
+```
+
+:::note
+The type returned is a DateTime64 as the DataTime32 must be promoted to 64 bit for the comparison.
+:::
+
+Also see [least](/docs/en/sql-reference/aggregate-functions/reference/least.md).
+
--- a/docs/en/sql-reference/aggregate-functions/reference/index.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/index.md
@ -26,6 +26,8 @@ ClickHouse-specific aggregate functions:

 - [anyHeavy](../../../sql-reference/aggregate-functions/reference/anyheavy.md)
 - [anyLast](../../../sql-reference/aggregate-functions/reference/anylast.md)
+- [first_value](../../../sql-reference/aggregate-functions/reference/first_value.md)
+- [last_value](../../../sql-reference/aggregate-functions/reference/last_value.md)
 - [argMin](../../../sql-reference/aggregate-functions/reference/argmin.md)
 - [argMax](../../../sql-reference/aggregate-functions/reference/argmax.md)
 - [avgWeighted](../../../sql-reference/aggregate-functions/reference/avgweighted.md)
--- a/docs/en/sql-reference/aggregate-functions/reference/last_value.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/last_value.md
@ -0,0 +1,53 @@
+---
+slug: /en/sql-reference/aggregate-functions/reference/last_value
+sidebar_position: 8
+---
+
+# first_value
+
+Selects the last encountered value, similar to `anyLast`, but could accept NULL.
+
+
+## examples
+
+```sql
+insert into test_data (a,b) values (1,null), (2,3), (4, 5), (6,null)
+```
+
+### example1
+The NULL value is ignored at default.
+```sql
+select last_value(b) from test_data
+```
+
+```text
+┌─last_value_ignore_nulls(b)─┐
+│                          5 │
+└────────────────────────────┘
+```
+
+### example2
+The NULL value is ignored.
+```sql
+select last_value(b) ignore nulls from test_data
+```
+
+```text
+┌─last_value_ignore_nulls(b)─┐
+│                          5 │
+└────────────────────────────┘
+```
+
+### example3
+The NULL value is accepted.
+```sql
+select last_value(b) respect nulls from test_data
+```
+
+```text
+┌─last_value_respect_nulls(b)─┐
+│                        ᴺᵁᴸᴸ │
+└─────────────────────────────┘
+```
+
+
--- a/docs/en/sql-reference/aggregate-functions/reference/least.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/least.md
@ -0,0 +1,48 @@
+---
+slug: /en/sql-reference/aggregate-functions/reference/least
+title: least
+---
+
+Aggregate function that returns the least across a list of values.  All of the list members must be of comparable types.
+
+Examples:
+
+```sql
+SELECT
+    toTypeName(least(toUInt8(1), 2, toUInt8(3), 3.)),
+    least(1, 2, toUInt8(3), 3.)
+```
+```response
+┌─toTypeName(least(toUInt8(1), 2, toUInt8(3), 3.))─┬─least(1, 2, toUInt8(3), 3.)─┐
+│ Float64                                          │                           1 │
+└──────────────────────────────────────────────────┴─────────────────────────────┘
+```
+
+:::note
+The type returned is a Float64 as the UInt8 must be promoted to 64 bit for the comparison.
+:::
+
+```sql
+SELECT least(['hello'], ['there'], ['world'])
+```
+```response
+┌─least(['hello'], ['there'], ['world'])─┐
+│ ['hello']                              │
+└────────────────────────────────────────┘
+```
+
+```sql
+SELECT least(toDateTime32(now() + toIntervalDay(1)), toDateTime64(now(), 3))
+```
+```response
+┌─least(toDateTime32(plus(now(), toIntervalDay(1))), toDateTime64(now(), 3))─┐
+│                                                    2023-05-12 01:16:59.000 │
+└────────────────────────────────────────────────────────────────────────────┘
+```
+
+:::note
+The type returned is a DateTime64 as the DataTime32 must be promoted to 64 bit for the comparison.
+:::
+
+Also see [greatest](/docs/en/sql-reference/aggregate-functions/reference/greatest.md).
+
--- a/docs/en/sql-reference/dictionaries/index.md
+++ b/docs/en/sql-reference/dictionaries/index.md
@ -2218,8 +2218,6 @@ LAYOUT(regexp_tree)
 ...
 ```

-We only allow `YAMLRegExpTree` to work with regexp_tree dicitionary layout. If you want to use other sources, please set variable `regexp_dict_allow_other_sources` true.
-
 **Source**

 We introduce a type of source called `YAMLRegExpTree` representing the structure of Regexp Tree dictionary. An Example of a valid yaml config is like:
--- a/docs/en/sql-reference/functions/index.md
+++ b/docs/en/sql-reference/functions/index.md
@ -59,244 +59,6 @@ A lambda function that accepts multiple arguments can also be passed to a higher

 For some functions the first argument (the lambda function) can be omitted. In this case, identical mapping is assumed.

-## SQL User Defined Functions
+## User Defined Functions (UDFs)

-Custom functions from lambda expressions can be created using the [CREATE FUNCTION](../statements/create/function.md) statement. To delete these functions use the [DROP FUNCTION](../statements/drop.md#drop-function) statement.
-
-## Executable User Defined Functions
-ClickHouse can call any external executable program or script to process data.
-
-The configuration of executable user defined functions can be located in one or more xml-files. The path to the configuration is specified in the [user_defined_executable_functions_config](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_defined_executable_functions_config) parameter.
-
-A function configuration contains the following settings:
-
- `name` - a function name.
- `command` - script name to execute or command if `execute_direct` is false.
- `argument` - argument description with the `type`, and optional `name` of an argument. Each argument is described in a separate setting. Specifying name is necessary if argument names are part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Default argument name value is `c` + argument_number.
- `format` - a [format](../../interfaces/formats.md) in which arguments are passed to the command.
- `return_type` - the type of a returned value.
- `return_name` - name of retuned value. Specifying return name is necessary if return name is part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Optional. Default value is `result`.
- `type` - an executable type. If `type` is set to `executable` then single command is started. If it is set to `executable_pool` then a pool of commands is created.
- `max_command_execution_time` - maximum execution time in seconds for processing block of data. This setting is valid for `executable_pool` commands only. Optional. Default value is `10`.
- `command_termination_timeout` - time in seconds during which a command should finish after its pipe is closed. After that time `SIGTERM` is sent to the process executing the command. Optional. Default value is `10`.
- `command_read_timeout` - timeout for reading data from command stdout in milliseconds. Default value 10000. Optional parameter.
- `command_write_timeout` - timeout for writing data to command stdin in milliseconds. Default value 10000. Optional parameter.
- `pool_size` - the size of a command pool. Optional. Default value is `16`.
- `send_chunk_header` - controls whether to send row count before sending a chunk of data to process. Optional. Default value is `false`.
- `execute_direct` - If `execute_direct` = `1`, then `command` will be searched inside user_scripts folder specified by [user_scripts_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_scripts_path). Additional script arguments can be specified using whitespace separator. Example: `script_name arg1 arg2`. If `execute_direct` = `0`, `command` is passed as argument for `bin/sh -c`. Default value is `1`. Optional parameter.
- `lifetime` - the reload interval of a function in seconds. If it is set to `0` then the function is not reloaded. Default value is `0`. Optional parameter.
-
-The command must read arguments from `STDIN` and must output the result to `STDOUT`. The command must process arguments iteratively. That is after processing a chunk of arguments it must wait for the next chunk.
-
-**Example**
-
-Creating `test_function` using XML configuration.
-File `test_function.xml` (`/etc/clickhouse-server/test_function.xml` with default path settings).
-```xml
-<functions>
-    <function>
-        <type>executable</type>
-        <name>test_function_python</name>
-        <return_type>String</return_type>
-        <argument>
-            <type>UInt64</type>
-            <name>value</name>
-        </argument>
-        <format>TabSeparated</format>
-        <command>test_function.py</command>
-    </function>
-</functions>
-```
-
-Script file inside `user_scripts` folder `test_function.py` (`/var/lib/clickhouse/user_scripts/test_function.py` with default path settings).
-
-```python
-#!/usr/bin/python3
-
-import sys
-
-if __name__ == '__main__':
-    for line in sys.stdin:
-        print("Value " + line, end='')
-        sys.stdout.flush()
-```
-
-Query:
-
-``` sql
-SELECT test_function_python(toUInt64(2));
-```
-
-Result:
-
-``` text
-┌─test_function_python(2)─┐
-│ Value 2                 │
-└─────────────────────────┘
-```
-
-Creating `test_function_sum` manually specifying `execute_direct` to `0` using XML configuration.
-File `test_function.xml` (`/etc/clickhouse-server/test_function.xml` with default path settings).
-```xml
-<functions>
-    <function>
-        <type>executable</type>
-        <name>test_function_sum</name>
-        <return_type>UInt64</return_type>
-        <argument>
-            <type>UInt64</type>
-            <name>lhs</name>
-        </argument>
-        <argument>
-            <type>UInt64</type>
-            <name>rhs</name>
-        </argument>
-        <format>TabSeparated</format>
-        <command>cd /; clickhouse-local --input-format TabSeparated --output-format TabSeparated --structure 'x UInt64, y UInt64' --query "SELECT x + y FROM table"</command>
-        <execute_direct>0</execute_direct>
-    </function>
-</functions>
-```
-
-Query:
-
-``` sql
-SELECT test_function_sum(2, 2);
-```
-
-Result:
-
-``` text
-┌─test_function_sum(2, 2)─┐
-│                       4 │
-└─────────────────────────┘
-```
-
-Creating `test_function_sum_json` with named arguments and format [JSONEachRow](../../interfaces/formats.md#jsoneachrow) using XML configuration.
-File `test_function.xml` (`/etc/clickhouse-server/test_function.xml` with default path settings).
-```xml
-<functions>
-    <function>
-        <type>executable</type>
-        <name>test_function_sum_json</name>
-        <return_type>UInt64</return_type>
-        <return_name>result_name</return_name>
-        <argument>
-            <type>UInt64</type>
-            <name>argument_1</name>
-        </argument>
-        <argument>
-            <type>UInt64</type>
-            <name>argument_2</name>
-        </argument>
-        <format>JSONEachRow</format>
-        <command>test_function_sum_json.py</command>
-    </function>
-</functions>
-```
-
-Script file inside `user_scripts` folder `test_function_sum_json.py` (`/var/lib/clickhouse/user_scripts/test_function_sum_json.py` with default path settings).
-
-```python
-#!/usr/bin/python3
-
-import sys
-import json
-
-if __name__ == '__main__':
-    for line in sys.stdin:
-        value = json.loads(line)
-        first_arg = int(value['argument_1'])
-        second_arg = int(value['argument_2'])
-        result = {'result_name': first_arg + second_arg}
-        print(json.dumps(result), end='\n')
-        sys.stdout.flush()
-```
-
-Query:
-
-``` sql
-SELECT test_function_sum_json(2, 2);
-```
-
-Result:
-
-``` text
-┌─test_function_sum_json(2, 2)─┐
-│                            4 │
-└──────────────────────────────┘
-```
-
-Executable user defined functions can take constant parameters configured in `command` setting (works only for user defined functions with `executable` type).
-File `test_function_parameter_python.xml` (`/etc/clickhouse-server/test_function_parameter_python.xml` with default path settings).
-```xml
-<functions>
-    <function>
-        <type>executable</type>
-        <name>test_function_parameter_python</name>
-        <return_type>String</return_type>
-        <argument>
-            <type>UInt64</type>
-        </argument>
-        <format>TabSeparated</format>
-        <command>test_function_parameter_python.py {test_parameter:UInt64}</command>
-    </function>
-</functions>
-```
-
-Script file inside `user_scripts` folder `test_function_parameter_python.py` (`/var/lib/clickhouse/user_scripts/test_function_parameter_python.py` with default path settings).
-
-```python
-#!/usr/bin/python3
-
-import sys
-
-if __name__ == "__main__":
-    for line in sys.stdin:
-        print("Parameter " + str(sys.argv[1]) + " value " + str(line), end="")
-        sys.stdout.flush()
-```
-
-Query:
-
-``` sql
-SELECT test_function_parameter_python(1)(2);
-```
-
-Result:
-
-``` text
-┌─test_function_parameter_python(1)(2)─┐
-│ Parameter 1 value 2                  │
-└──────────────────────────────────────┘
-```
-
-## Error Handling
-
-Some functions might throw an exception if the data is invalid. In this case, the query is canceled and an error text is returned to the client. For distributed processing, when an exception occurs on one of the servers, the other servers also attempt to abort the query.
-
-## Evaluation of Argument Expressions
-
-In almost all programming languages, one of the arguments might not be evaluated for certain operators. This is usually the operators `&&`, `||`, and `?:`.
-But in ClickHouse, arguments of functions (operators) are always evaluated. This is because entire parts of columns are evaluated at once, instead of calculating each row separately.
-
-## Performing Functions for Distributed Query Processing
-
-For distributed query processing, as many stages of query processing as possible are performed on remote servers, and the rest of the stages (merging intermediate results and everything after that) are performed on the requestor server.
-
-This means that functions can be performed on different servers.
-For example, in the query `SELECT f(sum(g(x))) FROM distributed_table GROUP BY h(y),`
-
- if a `distributed_table` has at least two shards, the functions ‘g’ and ‘h’ are performed on remote servers, and the function ‘f’ is performed on the requestor server.
- if a `distributed_table` has only one shard, all the ‘f’, ‘g’, and ‘h’ functions are performed on this shard’s server.
-
-The result of a function usually does not depend on which server it is performed on. However, sometimes this is important.
-For example, functions that work with dictionaries use the dictionary that exists on the server they are running on.
-Another example is the `hostName` function, which returns the name of the server it is running on in order to make `GROUP BY` by servers in a `SELECT` query.
-
-If a function in a query is performed on the requestor server, but you need to perform it on remote servers, you can wrap it in an ‘any’ aggregate function or add it to a key in `GROUP BY`.
-
-
-## Related Content
-
- [User-defined functions in ClickHouse Cloud](https://clickhouse.com/blog/user-defined-functions-clickhouse-udfs)
+ClickHouse supports user-defined functions. See [UDFs](/docs/en/sql-reference/functions/udf.md).
--- a/docs/en/sql-reference/functions/udf.md
+++ b/docs/en/sql-reference/functions/udf.md
@ -0,0 +1,249 @@
+---
+slug: /en/sql-reference/functions/udf
+sidebar_position: 15
+sidebar_label: UDF
+---
+
+# UDFs User Defined Functions
+
+
+## Executable User Defined Functions
+ClickHouse can call any external executable program or script to process data.
+
+The configuration of executable user defined functions can be located in one or more xml-files. The path to the configuration is specified in the [user_defined_executable_functions_config](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_defined_executable_functions_config) parameter.
+
+A function configuration contains the following settings:
+
+- `name` - a function name.
+- `command` - script name to execute or command if `execute_direct` is false.
+- `argument` - argument description with the `type`, and optional `name` of an argument. Each argument is described in a separate setting. Specifying name is necessary if argument names are part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Default argument name value is `c` + argument_number.
+- `format` - a [format](../../interfaces/formats.md) in which arguments are passed to the command.
+- `return_type` - the type of a returned value.
+- `return_name` - name of retuned value. Specifying return name is necessary if return name is part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Optional. Default value is `result`.
+- `type` - an executable type. If `type` is set to `executable` then single command is started. If it is set to `executable_pool` then a pool of commands is created.
+- `max_command_execution_time` - maximum execution time in seconds for processing block of data. This setting is valid for `executable_pool` commands only. Optional. Default value is `10`.
+- `command_termination_timeout` - time in seconds during which a command should finish after its pipe is closed. After that time `SIGTERM` is sent to the process executing the command. Optional. Default value is `10`.
+- `command_read_timeout` - timeout for reading data from command stdout in milliseconds. Default value 10000. Optional parameter.
+- `command_write_timeout` - timeout for writing data to command stdin in milliseconds. Default value 10000. Optional parameter.
+- `pool_size` - the size of a command pool. Optional. Default value is `16`.
+- `send_chunk_header` - controls whether to send row count before sending a chunk of data to process. Optional. Default value is `false`.
+- `execute_direct` - If `execute_direct` = `1`, then `command` will be searched inside user_scripts folder specified by [user_scripts_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_scripts_path). Additional script arguments can be specified using whitespace separator. Example: `script_name arg1 arg2`. If `execute_direct` = `0`, `command` is passed as argument for `bin/sh -c`. Default value is `1`. Optional parameter.
+- `lifetime` - the reload interval of a function in seconds. If it is set to `0` then the function is not reloaded. Default value is `0`. Optional parameter.
+
+The command must read arguments from `STDIN` and must output the result to `STDOUT`. The command must process arguments iteratively. That is after processing a chunk of arguments it must wait for the next chunk.
+
+**Example**
+
+Creating `test_function` using XML configuration.
+File `test_function.xml` (`/etc/clickhouse-server/test_function.xml` with default path settings).
+```xml
+<functions>
+    <function>
+        <type>executable</type>
+        <name>test_function_python</name>
+        <return_type>String</return_type>
+        <argument>
+            <type>UInt64</type>
+            <name>value</name>
+        </argument>
+        <format>TabSeparated</format>
+        <command>test_function.py</command>
+    </function>
+</functions>
+```
+
+Script file inside `user_scripts` folder `test_function.py` (`/var/lib/clickhouse/user_scripts/test_function.py` with default path settings).
+
+```python
+#!/usr/bin/python3
+
+import sys
+
+if __name__ == '__main__':
+    for line in sys.stdin:
+        print("Value " + line, end='')
+        sys.stdout.flush()
+```
+
+Query:
+
+``` sql
+SELECT test_function_python(toUInt64(2));
+```
+
+Result:
+
+``` text
+┌─test_function_python(2)─┐
+│ Value 2                 │
+└─────────────────────────┘
+```
+
+Creating `test_function_sum` manually specifying `execute_direct` to `0` using XML configuration.
+File `test_function.xml` (`/etc/clickhouse-server/test_function.xml` with default path settings).
+```xml
+<functions>
+    <function>
+        <type>executable</type>
+        <name>test_function_sum</name>
+        <return_type>UInt64</return_type>
+        <argument>
+            <type>UInt64</type>
+            <name>lhs</name>
+        </argument>
+        <argument>
+            <type>UInt64</type>
+            <name>rhs</name>
+        </argument>
+        <format>TabSeparated</format>
+        <command>cd /; clickhouse-local --input-format TabSeparated --output-format TabSeparated --structure 'x UInt64, y UInt64' --query "SELECT x + y FROM table"</command>
+        <execute_direct>0</execute_direct>
+    </function>
+</functions>
+```
+
+Query:
+
+``` sql
+SELECT test_function_sum(2, 2);
+```
+
+Result:
+
+``` text
+┌─test_function_sum(2, 2)─┐
+│                       4 │
+└─────────────────────────┘
+```
+
+Creating `test_function_sum_json` with named arguments and format [JSONEachRow](../../interfaces/formats.md#jsoneachrow) using XML configuration.
+File `test_function.xml` (`/etc/clickhouse-server/test_function.xml` with default path settings).
+```xml
+<functions>
+    <function>
+        <type>executable</type>
+        <name>test_function_sum_json</name>
+        <return_type>UInt64</return_type>
+        <return_name>result_name</return_name>
+        <argument>
+            <type>UInt64</type>
+            <name>argument_1</name>
+        </argument>
+        <argument>
+            <type>UInt64</type>
+            <name>argument_2</name>
+        </argument>
+        <format>JSONEachRow</format>
+        <command>test_function_sum_json.py</command>
+    </function>
+</functions>
+```
+
+Script file inside `user_scripts` folder `test_function_sum_json.py` (`/var/lib/clickhouse/user_scripts/test_function_sum_json.py` with default path settings).
+
+```python
+#!/usr/bin/python3
+
+import sys
+import json
+
+if __name__ == '__main__':
+    for line in sys.stdin:
+        value = json.loads(line)
+        first_arg = int(value['argument_1'])
+        second_arg = int(value['argument_2'])
+        result = {'result_name': first_arg + second_arg}
+        print(json.dumps(result), end='\n')
+        sys.stdout.flush()
+```
+
+Query:
+
+``` sql
+SELECT test_function_sum_json(2, 2);
+```
+
+Result:
+
+``` text
+┌─test_function_sum_json(2, 2)─┐
+│                            4 │
+└──────────────────────────────┘
+```
+
+Executable user defined functions can take constant parameters configured in `command` setting (works only for user defined functions with `executable` type).
+File `test_function_parameter_python.xml` (`/etc/clickhouse-server/test_function_parameter_python.xml` with default path settings).
+```xml
+<functions>
+    <function>
+        <type>executable</type>
+        <name>test_function_parameter_python</name>
+        <return_type>String</return_type>
+        <argument>
+            <type>UInt64</type>
+        </argument>
+        <format>TabSeparated</format>
+        <command>test_function_parameter_python.py {test_parameter:UInt64}</command>
+    </function>
+</functions>
+```
+
+Script file inside `user_scripts` folder `test_function_parameter_python.py` (`/var/lib/clickhouse/user_scripts/test_function_parameter_python.py` with default path settings).
+
+```python
+#!/usr/bin/python3
+
+import sys
+
+if __name__ == "__main__":
+    for line in sys.stdin:
+        print("Parameter " + str(sys.argv[1]) + " value " + str(line), end="")
+        sys.stdout.flush()
+```
+
+Query:
+
+``` sql
+SELECT test_function_parameter_python(1)(2);
+```
+
+Result:
+
+``` text
+┌─test_function_parameter_python(1)(2)─┐
+│ Parameter 1 value 2                  │
+└──────────────────────────────────────┘
+```
+
+## Error Handling
+
+Some functions might throw an exception if the data is invalid. In this case, the query is canceled and an error text is returned to the client. For distributed processing, when an exception occurs on one of the servers, the other servers also attempt to abort the query.
+
+## Evaluation of Argument Expressions
+
+In almost all programming languages, one of the arguments might not be evaluated for certain operators. This is usually the operators `&&`, `||`, and `?:`.
+But in ClickHouse, arguments of functions (operators) are always evaluated. This is because entire parts of columns are evaluated at once, instead of calculating each row separately.
+
+## Performing Functions for Distributed Query Processing
+
+For distributed query processing, as many stages of query processing as possible are performed on remote servers, and the rest of the stages (merging intermediate results and everything after that) are performed on the requestor server.
+
+This means that functions can be performed on different servers.
+For example, in the query `SELECT f(sum(g(x))) FROM distributed_table GROUP BY h(y),`
+
+- if a `distributed_table` has at least two shards, the functions ‘g’ and ‘h’ are performed on remote servers, and the function ‘f’ is performed on the requestor server.
+- if a `distributed_table` has only one shard, all the ‘f’, ‘g’, and ‘h’ functions are performed on this shard’s server.
+
+The result of a function usually does not depend on which server it is performed on. However, sometimes this is important.
+For example, functions that work with dictionaries use the dictionary that exists on the server they are running on.
+Another example is the `hostName` function, which returns the name of the server it is running on in order to make `GROUP BY` by servers in a `SELECT` query.
+
+If a function in a query is performed on the requestor server, but you need to perform it on remote servers, you can wrap it in an ‘any’ aggregate function or add it to a key in `GROUP BY`.
+
+## SQL User Defined Functions
+
+Custom functions from lambda expressions can be created using the [CREATE FUNCTION](../statements/create/function.md) statement. To delete these functions use the [DROP FUNCTION](../statements/drop.md#drop-function) statement.
+
+## Related Content
+
+### [User-defined functions in ClickHouse Cloud](https://clickhouse.com/blog/user-defined-functions-clickhouse-udfs)
--- a/docs/en/sql-reference/statements/alter/partition.md
+++ b/docs/en/sql-reference/statements/alter/partition.md
@ -103,7 +103,11 @@ ALTER TABLE table2 [ON CLUSTER cluster] ATTACH PARTITION partition_expr FROM tab
 ```

 This query copies the data partition from `table1` to `table2`.
-Note that data will be deleted neither from `table1` nor from `table2`.
+
+Note that:
+
+- Data will be deleted neither from `table1` nor from `table2`.
+- `table1` may be a temporary table.

 For the query to run successfully, the following conditions must be met:

@ -117,7 +121,12 @@ For the query to run successfully, the following conditions must be met:
 ALTER TABLE table2 [ON CLUSTER cluster] REPLACE PARTITION partition_expr FROM table1
 ```

-This query copies the data partition from the `table1` to `table2` and replaces existing partition in the `table2`. Note that data won’t be deleted from `table1`.
+This query copies the data partition from the `table1` to `table2` and replaces existing partition in the `table2`.
+
+Note that:
+
+- Data won’t be deleted from `table1`.
+- `table1` may be a temporary table.

 For the query to run successfully, the following conditions must be met:

--- a/docs/en/sql-reference/statements/create/function.md
+++ b/docs/en/sql-reference/statements/create/function.md
@ -61,4 +61,6 @@ Result:

 ## Related Content

- [User-defined functions in ClickHouse Cloud](https://clickhouse.com/blog/user-defined-functions-clickhouse-udfs)
+### [Executable UDFs](/docs/en/sql-reference/functions/udf.md).
+
+### [User-defined functions in ClickHouse Cloud](https://clickhouse.com/blog/user-defined-functions-clickhouse-udfs)
--- a/docs/en/sql-reference/statements/select/into-outfile.md
+++ b/docs/en/sql-reference/statements/select/into-outfile.md
@ -12,7 +12,7 @@ Compressed files are supported. Compression type is detected by the extension of
 **Syntax**

 ```sql
-SELECT <expr_list> INTO OUTFILE file_name [AND STDOUT] [COMPRESSION type [LEVEL level]]
+SELECT <expr_list> INTO OUTFILE file_name [AND STDOUT] [APPEND] [COMPRESSION type [LEVEL level]]
 ```

 `file_name` and `type` are string literals. Supported compression types are: `'none'`, `'gzip'`, `'deflate'`, `'br'`, `'xz'`, `'zstd'`, `'lz4'`, `'bz2'`.
@ -25,6 +25,7 @@ SELECT <expr_list> INTO OUTFILE file_name [AND STDOUT] [COMPRESSION type [LEVEL
 - The query will fail if a file with the same file name already exists.
 - The default [output format](../../../interfaces/formats.md) is `TabSeparated` (like in the command-line client batch mode). Use [FORMAT](format.md) clause to change it.
 - If `AND STDOUT` is mentioned in the query then the output that is written to the file is also displayed on standard output. If used with compression, the plaintext is displayed on standard output.
+- If `APPEND` is mentioned in the query then the output is appended to an existing file. If compression is used, append cannot be used.

 **Example**

--- a/docs/en/sql-reference/table-functions/dictionary.md
+++ b/docs/en/sql-reference/table-functions/dictionary.md
@ -1,7 +1,7 @@
 ---
 slug: /en/sql-reference/table-functions/dictionary
 sidebar_position: 54
-sidebar_label: dictionary function
+sidebar_label: dictionary
 title: dictionary
 ---

--- a/docs/ru/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/ru/engines/table-engines/mergetree-family/mergetree.md
@ -391,7 +391,7 @@ INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE set(100) GRANULARIT
 ## Проекции {#projections}
 Проекции похожи на [материализованные представления](../../../sql-reference/statements/create/view.md#materialized), но определяются на уровне кусков данных. Это обеспечивает гарантии согласованности данных наряду с автоматическим использованием в запросах.

-Проекции — это экспериментальная возможность. Чтобы включить поддержку проекций, установите настройку [allow_experimental_projection_optimization](../../../operations/settings/settings.md#allow-experimental-projection-optimization) в значение `1`. См. также настройку [force_optimize_projection ](../../../operations/settings/settings.md#force-optimize-projection).
+Проекции — это экспериментальная возможность. Чтобы включить поддержку проекций, установите настройку [optimize_use_projections](../../../operations/settings/settings.md#allow-experimental-projection-optimization) в значение `1`. См. также настройку [force_optimize_projection ](../../../operations/settings/settings.md#optimize_use_projections).

 Проекции не поддерживаются для запросов `SELECT` с модификатором [FINAL](../../../sql-reference/statements/select/from.md#select-from-final).

--- a/docs/ru/operations/settings/settings-users.md
+++ b/docs/ru/operations/settings/settings-users.md
@ -37,6 +37,10 @@ sidebar_label: "Настройки пользователей"
                <table_name>
            </database_name>
        </databases>
+        
+        <grants>
+            <query>GRANT SELECT ON system.*</query>
+        </grants>
    </user_name>
    <!-- Other users settings -->
 </users>
@ -89,6 +93,27 @@ sidebar_label: "Настройки пользователей"

 Значение по умолчанию: 0.

+### grants {#grants-user-setting}
+
+Настройка позволяет указать набор прав для заданного пользователя.
+Каждый элемент списка должен представлять собой `GRANT` запрос без указания пользователей в самом запросе.
+
+Пример:
+
+```xml
+<user1>
+    <grants>
+        <query>GRANT SHOW ON *.*</query>
+        <query>GRANT CREATE ON *.* WITH GRANT OPTION</query>
+        <query>GRANT SELECT ON system.*</query>
+    </grants>
+</user1>
+```
+
+Настройка не может быть выставлена одновременно с
+`dictionaries`, `access_management`, `named_collection_control`, `show_named_collections_secrets`
+или `allow_databases`.
+
 ### user_name/networks {#user-namenetworks}

 Список сетей, из которых пользователь может подключиться к серверу ClickHouse.
--- a/docs/ru/operations/settings/settings.md
+++ b/docs/ru/operations/settings/settings.md
@ -3588,7 +3588,7 @@ SETTINGS index_granularity = 8192 │

 Строка с идентификатором снэпшота, из которого будет выполняться [исходный дамп таблиц PostgreSQL](../../engines/database-engines/materialized-postgresql.md). Эта настройка должна использоваться совместно с [materialized_postgresql_replication_slot](#materialized-postgresql-replication-slot).

-## allow_experimental_projection_optimization {#allow-experimental-projection-optimization}
+## optimize_use_projections {#optimize_use_projections}

 Включает или отключает поддержку [проекций](../../engines/table-engines/mergetree-family/mergetree.md#projections) при обработке запросов `SELECT`.

@ -3601,7 +3601,7 @@ SETTINGS index_granularity = 8192 │

 ## force_optimize_projection {#force-optimize-projection}

-Включает или отключает обязательное использование [проекций](../../engines/table-engines/mergetree-family/mergetree.md#projections) в запросах `SELECT`, если поддержка проекций включена (см. настройку [allow_experimental_projection_optimization](#allow-experimental-projection-optimization)).
+Включает или отключает обязательное использование [проекций](../../engines/table-engines/mergetree-family/mergetree.md#projections) в запросах `SELECT`, если поддержка проекций включена (см. настройку [optimize_use_projections](#optimize_use_projections)).

 Возможные значения:

--- a/docs/ru/sql-reference/statements/alter/partition.md
+++ b/docs/ru/sql-reference/statements/alter/partition.md
@ -102,7 +102,11 @@ ALTER TABLE table2 [ON CLUSTER cluster] ATTACH PARTITION partition_expr FROM tab
 ```

 Копирует партицию из таблицы `table1` в таблицу `table2`.
-Обратите внимание, что данные не удаляются ни из `table1`, ни из `table2`.
+
+Обратите внимание, что:
+
+- Данные не удаляются ни из `table1`, ни из `table2`.
+- `table1` может быть временной таблицей.

 Следует иметь в виду:

@ -118,7 +122,12 @@ ALTER TABLE table2 [ON CLUSTER cluster] ATTACH PARTITION partition_expr FROM tab
 ALTER TABLE table2 [ON CLUSTER cluster] REPLACE PARTITION partition_expr FROM table1
 ```

-Копирует партицию из таблицы `table1` в таблицу `table2` с заменой существующих данных в `table2`. Данные из `table1` не удаляются.
+Копирует партицию из таблицы `table1` в таблицу `table2` с заменой существующих данных в `table2`.
+
+Обратите внимание, что:
+
+- Данные из `table1` не удаляются.
+- `table1` может быть временной таблицей.

 Следует иметь в виду:

--- a/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md
+++ b/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md
@ -1074,7 +1074,7 @@ ClickHouse服务器日志文件中相应的跟踪日志确认了ClickHouse正在
 <a href="https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree/#projections" target="_blank">Projections</a>目前是一个实验性的功能，因此我们需要告诉ClickHouse：

 ```sql
-SET allow_experimental_projection_optimization = 1;
+SET optimize_use_projections = 1;
 ```


--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@ -1395,8 +1395,8 @@ try
                {
                    Poco::Net::ServerSocket socket;
                    auto address = socketBindListen(config(), socket, listen_host, port);
-                    socket.setReceiveTimeout(config().getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC));
-                    socket.setSendTimeout(config().getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC));
+                    socket.setReceiveTimeout(Poco::Timespan(config().getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), 0));
+                    socket.setSendTimeout(Poco::Timespan(config().getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC), 0));
                    return ProtocolServerAdapter(
                        listen_host,
                        port_name,
@ -1418,8 +1418,8 @@ try
 #if USE_SSL
                    Poco::Net::SecureServerSocket socket;
                    auto address = socketBindListen(config(), socket, listen_host, port, /* secure = */ true);
-                    socket.setReceiveTimeout(config().getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC));
-                    socket.setSendTimeout(config().getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC));
+                    socket.setReceiveTimeout(Poco::Timespan(config().getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), 0));
+                    socket.setSendTimeout(Poco::Timespan(config().getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC), 0));
                    return ProtocolServerAdapter(
                        listen_host,
                        secure_port_name,
--- a/src/Access/Common/AccessType.h
+++ b/src/Access/Common/AccessType.h
@ -185,6 +185,7 @@ enum class AccessType
    M(SYSTEM_FLUSH, "", GROUP, SYSTEM) \
    M(SYSTEM_THREAD_FUZZER, "SYSTEM START THREAD FUZZER, SYSTEM STOP THREAD FUZZER, START THREAD FUZZER, STOP THREAD FUZZER", GLOBAL, SYSTEM) \
    M(SYSTEM_UNFREEZE, "SYSTEM UNFREEZE", GLOBAL, SYSTEM) \
+    M(SYSTEM_FAILPOINT, "SYSTEM ENABLE FAILPOINT, SYSTEM DISABLE FAILPOINT", GLOBAL, SYSTEM) \
    M(SYSTEM, "", GROUP, ALL) /* allows to execute SYSTEM {SHUTDOWN|RELOAD CONFIG|...} */ \
    \
    M(dictGet, "dictHas, dictGetHierarchy, dictIsIn", DICTIONARY, ALL) /* allows to execute functions dictGet(), dictHas(), dictGetHierarchy(), dictIsIn() */\
--- a/src/Access/UsersConfigAccessStorage.cpp
+++ b/src/Access/UsersConfigAccessStorage.cpp
@ -11,6 +11,10 @@
 #include <Common/StringUtils/StringUtils.h>
 #include <Common/quoteString.h>
 #include <Core/Settings.h>
+#include <Interpreters/executeQuery.h>
+#include <Parsers/Access/ASTGrantQuery.h>
+#include <Parsers/Access/ParserGrantQuery.h>
+#include <Parsers/parseQuery.h>
 #include <Poco/Util/AbstractConfiguration.h>
 #include <Poco/MD5Engine.h>
 #include <Poco/JSON/JSON.h>
@ -49,7 +53,12 @@ namespace
    UUID generateID(const IAccessEntity & entity) { return generateID(entity.getType(), entity.getName()); }


-    UserPtr parseUser(const Poco::Util::AbstractConfiguration & config, const String & user_name, const std::unordered_set<UUID> & allowed_profile_ids, bool allow_no_password, bool allow_plaintext_password)
+    UserPtr parseUser(
+        const Poco::Util::AbstractConfiguration & config,
+        const String & user_name,
+        const std::unordered_set<UUID> & allowed_profile_ids,
+        bool allow_no_password,
+        bool allow_plaintext_password)
    {
        auto user = std::make_shared<User>();
        user->setName(user_name);
@ -207,6 +216,65 @@ namespace
            }
        }

+        const auto grants_config = user_config + ".grants";
+        std::optional<Strings> grant_queries;
+        if (config.has(grants_config))
+        {
+            Poco::Util::AbstractConfiguration::Keys keys;
+            config.keys(grants_config, keys);
+            grant_queries.emplace();
+            grant_queries->reserve(keys.size());
+            for (const auto & key : keys)
+            {
+                const auto query = config.getString(grants_config + "." + key);
+                grant_queries->push_back(query);
+            }
+        }
+
+        bool access_management = config.getBool(user_config + ".access_management", false);
+        bool named_collection_control = config.getBool(user_config + ".named_collection_control", false);
+        bool show_named_collections_secrets = config.getBool(user_config + ".show_named_collections_secrets", false);
+
+        if (grant_queries)
+            if (databases || dictionaries || access_management || named_collection_control || show_named_collections_secrets)
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Any other access control settings can't be specified with `grants`");
+
+        if (grant_queries)
+        {
+            ParserGrantQuery parser;
+            parser.parseWithoutGrantees();
+
+            for (const auto & string_query : *grant_queries)
+            {
+                String error_message;
+                const char * pos = string_query.data();
+                auto ast = tryParseQuery(parser, pos, pos + string_query.size(), error_message, false, "", false, 0, 0);
+
+                if (!ast)
+                    throw Exception(ErrorCodes::BAD_ARGUMENTS, "Failed to parse grant query. Error: {}", error_message);
+
+                auto & query = ast->as<ASTGrantQuery &>();
+
+                if (query.roles)
+                    throw Exception(ErrorCodes::BAD_ARGUMENTS, "Roles can't be granted in config file");
+
+                if (!query.cluster.empty())
+                    throw Exception(ErrorCodes::BAD_ARGUMENTS, "Can't grant on cluster using config file");
+
+                if (query.grantees)
+                    throw Exception(ErrorCodes::BAD_ARGUMENTS, "You can't specify grantees in query using config file");
+
+                for (auto & element : query.access_rights_elements)
+                {
+                    if (query.is_revoke)
+                        user->access.revoke(element);
+                    else
+                        user->access.grant(element);
+                }
+            }
+        }
+        else
+        {
            /// By default all databases are accessible
            /// and the user can grant everything he has.
            user->access.grantWithGrantOption(AccessType::ALL);
@ -226,24 +294,22 @@ namespace
                    user->access.grantWithGrantOption(AccessFlags::allDictionaryFlags(), IDictionary::NO_DATABASE_TAG, dictionary);
            }

-        bool access_management = config.getBool(user_config + ".access_management", false);
            if (!access_management)
            {
                user->access.revoke(AccessType::ACCESS_MANAGEMENT);
                user->access.revokeGrantOption(AccessType::ALL);
            }

-        bool named_collection_control = config.getBool(user_config + ".named_collection_control", false);
            if (!named_collection_control)
            {
                user->access.revoke(AccessType::NAMED_COLLECTION_CONTROL);
            }

-        bool show_named_collections_secrets = config.getBool(user_config + ".show_named_collections_secrets", false);
            if (!show_named_collections_secrets)
            {
                user->access.revoke(AccessType::SHOW_NAMED_COLLECTIONS_SECRETS);
            }
+        }

        String default_database = config.getString(user_config + ".default_database", "");
        user->default_database = default_database;
@ -252,7 +318,11 @@ namespace
    }


-    std::vector<AccessEntityPtr> parseUsers(const Poco::Util::AbstractConfiguration & config, const std::unordered_set<UUID> & allowed_profile_ids, bool allow_no_password, bool allow_plaintext_password)
+    std::vector<AccessEntityPtr> parseUsers(
+        const Poco::Util::AbstractConfiguration & config,
+        const std::unordered_set<UUID> & allowed_profile_ids,
+        bool allow_no_password,
+        bool allow_plaintext_password)
    {
        Poco::Util::AbstractConfiguration::Keys user_names;
        config.keys("users", user_names);
--- a/src/AggregateFunctions/AggregateFunctionAny.cpp
+++ b/src/AggregateFunctions/AggregateFunctionAny.cpp
@ -14,11 +14,29 @@ AggregateFunctionPtr createAggregateFunctionAny(const std::string & name, const
    return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValue, AggregateFunctionAnyData>(name, argument_types, parameters, settings));
 }

+template <bool RespectNulls = false>
+AggregateFunctionPtr createAggregateFunctionNullableAny(
+    const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
+{
+    return AggregateFunctionPtr(
+        createAggregateFunctionSingleNullableValue<AggregateFunctionsSingleValue, AggregateFunctionAnyData, RespectNulls>(
+            name, argument_types, parameters, settings));
+}
+
 AggregateFunctionPtr createAggregateFunctionAnyLast(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
 {
    return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValue, AggregateFunctionAnyLastData>(name, argument_types, parameters, settings));
 }

+template <bool RespectNulls = false>
+AggregateFunctionPtr createAggregateFunctionNullableAnyLast(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
+{
+    return AggregateFunctionPtr(createAggregateFunctionSingleNullableValue<
+                                AggregateFunctionsSingleValue,
+                                AggregateFunctionAnyLastData,
+                                RespectNulls>(name, argument_types, parameters, settings));
+}
+
 AggregateFunctionPtr createAggregateFunctionAnyHeavy(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
 {
    return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValue, AggregateFunctionAnyHeavyData>(name, argument_types, parameters, settings));
@ -38,9 +56,15 @@ void registerAggregateFunctionsAny(AggregateFunctionFactory & factory)
    factory.registerFunction("first_value",
        { createAggregateFunctionAny, properties },
        AggregateFunctionFactory::CaseInsensitive);
+    factory.registerFunction("first_value_respect_nulls",
+        { createAggregateFunctionNullableAny<true>, properties },
+        AggregateFunctionFactory::CaseInsensitive);
    factory.registerFunction("last_value",
        { createAggregateFunctionAnyLast, properties },
        AggregateFunctionFactory::CaseInsensitive);
+    factory.registerFunction("last_value_respect_nulls",
+        { createAggregateFunctionNullableAnyLast<true>, properties },
+        AggregateFunctionFactory::CaseInsensitive);
 }

 }
--- a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h
+++ b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h
@ -768,19 +768,23 @@ static_assert(


 /// For any other value types.
+template <bool IS_NULLABLE = false>
 struct SingleValueDataGeneric
 {
 private:
    using Self = SingleValueDataGeneric;

    Field value;
+    bool has_value = false;

 public:
-    static constexpr bool is_nullable = false;
+    static constexpr bool is_nullable = IS_NULLABLE;
    static constexpr bool is_any = false;

    bool has() const
    {
+        if constexpr (is_nullable)
+            return has_value;
        return !value.isNull();
    }

@ -815,11 +819,15 @@ public:
    void change(const IColumn & column, size_t row_num, Arena *)
    {
        column.get(row_num, value);
+        if constexpr (is_nullable)
+            has_value = true;
    }

    void change(const Self & to, Arena *)
    {
        value = to.value;
+        if constexpr (is_nullable)
+            has_value = true;
    }

    bool changeFirstTime(const IColumn & column, size_t row_num, Arena * arena)
@ -835,7 +843,7 @@ public:

    bool changeFirstTime(const Self & to, Arena * arena)
    {
-        if (!has() && to.has())
+        if (!has() && (is_nullable || to.has()))
        {
            change(to, arena);
            return true;
@ -869,6 +877,20 @@ public:
            return true;
        }
        else
+        {
+            if constexpr (is_nullable)
+            {
+                Field new_value;
+                column.get(row_num, new_value);
+                if (!value.isNull() && (new_value.isNull() || new_value < value))
+                {
+                    value = new_value;
+                    return true;
+                }
+                else
+                    return false;
+            }
+            else
            {
                Field new_value;
                column.get(row_num, new_value);
@ -881,10 +903,29 @@ public:
                    return false;
            }
        }
+    }

    bool changeIfLess(const Self & to, Arena * arena)
    {
-        if (to.has() && (!has() || to.value < value))
+        if (!to.has())
+            return false;
+        if constexpr (is_nullable)
+        {
+            if (!has())
+            {
+                change(to, arena);
+                return true;
+            }
+            if (to.value.isNull() || (!value.isNull() && to.value < value))
+            {
+                value = to.value;
+                return true;
+            }
+            return false;
+        }
+        else
+        {
+            if (!has() || to.value < value)
            {
                change(to, arena);
                return true;
@ -892,6 +933,7 @@ public:
            else
                return false;
        }
+    }

    bool changeIfGreater(const IColumn & column, size_t row_num, Arena * arena)
    {
@ -901,6 +943,19 @@ public:
            return true;
        }
        else
+        {
+            if constexpr (is_nullable)
+            {
+                Field new_value;
+                column.get(row_num, new_value);
+                if (!value.isNull() && (new_value.isNull() || value < new_value))
+                {
+                    value = new_value;
+                    return true;
+                }
+                return false;
+            }
+            else
            {
                Field new_value;
                column.get(row_num, new_value);
@ -913,10 +968,24 @@ public:
                    return false;
            }
        }
+    }

    bool changeIfGreater(const Self & to, Arena * arena)
    {
-        if (to.has() && (!has() || to.value > value))
+        if (!to.has())
+            return false;
+        if constexpr (is_nullable)
+        {
+            if (!value.isNull() && (to.value.isNull() || value < to.value))
+            {
+                value = to.value;
+                return true;
+            }
+            return false;
+        }
+        else
+        {
+            if (!has() || to.value > value)
            {
                change(to, arena);
                return true;
@ -924,6 +993,7 @@ public:
            else
                return false;
        }
+    }

    bool isEqualTo(const IColumn & column, size_t row_num) const
    {
@ -1359,6 +1429,17 @@ public:
        this->data(place).insertResultInto(to);
    }

+    AggregateFunctionPtr getOwnNullAdapter(
+        const AggregateFunctionPtr & nested_function,
+        const DataTypes & /*arguments*/,
+        const Array & /*params*/,
+        const AggregateFunctionProperties & /*properties*/) const override
+    {
+        if (Data::is_nullable)
+            return nested_function;
+        return nullptr;
+    }
+
 #if USE_EMBEDDED_COMPILER

    bool isCompilable() const override
--- a/src/AggregateFunctions/HelpersMinMaxAny.h
+++ b/src/AggregateFunctions/HelpersMinMaxAny.h
@ -9,7 +9,6 @@
 #include <DataTypes/DataTypeDateTime.h>
 #include <DataTypes/DataTypeString.h>

-
 namespace DB
 {
 struct Settings;
@ -22,7 +21,6 @@ static IAggregateFunction * createAggregateFunctionSingleValue(const String & na
    assertUnary(name, argument_types);

    const DataTypePtr & argument_type = argument_types[0];
-
    WhichDataType which(argument_type);
 #define DISPATCH(TYPE) \
    if (which.idx == TypeIndex::TYPE) return new AggregateFunctionTemplate<Data<SingleValueDataFixed<TYPE>>>(argument_type); /// NOLINT
@ -46,7 +44,28 @@ static IAggregateFunction * createAggregateFunctionSingleValue(const String & na
    if (which.idx == TypeIndex::String)
        return new AggregateFunctionTemplate<Data<SingleValueDataString>>(argument_type);

-    return new AggregateFunctionTemplate<Data<SingleValueDataGeneric>>(argument_type);
+    return new AggregateFunctionTemplate<Data<SingleValueDataGeneric<>>>(argument_type);
+}
+
+template <template <typename> class AggregateFunctionTemplate, template <typename> class Data, bool RespectNulls = false>
+static IAggregateFunction * createAggregateFunctionSingleNullableValue(const String & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
+{
+    assertNoParameters(name, parameters);
+    assertUnary(name, argument_types);
+
+    const DataTypePtr & argument_type = argument_types[0];
+    WhichDataType which(argument_type);
+    // If the result value could be null (excluding the case that no row is matched),
+    // use SingleValueDataGeneric.
+    if constexpr (!RespectNulls)
+    {
+        return createAggregateFunctionSingleValue<AggregateFunctionTemplate, Data>(name, argument_types, Array(), settings);
+    }
+    else
+    {
+        return new AggregateFunctionTemplate<Data<SingleValueDataGeneric<true>>>(argument_type);
+    }
+    UNREACHABLE();
 }


@ -79,7 +98,7 @@ static IAggregateFunction * createAggregateFunctionArgMinMaxSecond(const DataTyp
    if (which.idx == TypeIndex::String)
        return new AggregateFunctionArgMinMax<AggregateFunctionArgMinMaxData<ResData, MinMaxData<SingleValueDataString>>>(res_type, val_type);

-    return new AggregateFunctionArgMinMax<AggregateFunctionArgMinMaxData<ResData, MinMaxData<SingleValueDataGeneric>>>(res_type, val_type);
+    return new AggregateFunctionArgMinMax<AggregateFunctionArgMinMaxData<ResData, MinMaxData<SingleValueDataGeneric<>>>>(res_type, val_type);
 }

 template <template <typename> class MinMaxData>
@ -115,7 +134,7 @@ static IAggregateFunction * createAggregateFunctionArgMinMax(const String & name
    if (which.idx == TypeIndex::String)
        return createAggregateFunctionArgMinMaxSecond<MinMaxData, SingleValueDataString>(res_type, val_type);

-    return createAggregateFunctionArgMinMaxSecond<MinMaxData, SingleValueDataGeneric>(res_type, val_type);
+    return createAggregateFunctionArgMinMaxSecond<MinMaxData, SingleValueDataGeneric<>>(res_type, val_type);
 }

 }
--- a/src/Analyzer/Passes/CNF.cpp
+++ b/src/Analyzer/Passes/CNF.cpp
@ -162,14 +162,13 @@ private:
 class PushOrVisitor
 {
 public:
-    PushOrVisitor(ContextPtr context, size_t max_atoms_, size_t num_atoms_)
+    PushOrVisitor(ContextPtr context, size_t max_atoms_)
        : max_atoms(max_atoms_)
-        , num_atoms(num_atoms_)
        , and_resolver(FunctionFactory::instance().get("and", context))
        , or_resolver(FunctionFactory::instance().get("or", context))
    {}

-    bool visit(QueryTreeNodePtr & node)
+    bool visit(QueryTreeNodePtr & node, size_t num_atoms)
    {
        if (max_atoms && num_atoms > max_atoms)
            return false;
@ -187,7 +186,10 @@ public:
        {
            auto & arguments = function_node->getArguments().getNodes();
            for (auto & argument : arguments)
-                visit(argument);
+            {
+                if (!visit(argument, num_atoms))
+                    return false;
+            }
        }

        if (name == "or")
@ -217,7 +219,7 @@ public:
            auto rhs = createFunctionNode(or_resolver, std::move(other_node), std::move(and_function_arguments[1]));
            node = createFunctionNode(and_resolver, std::move(lhs), std::move(rhs));

-            visit(node);
+            return visit(node, num_atoms);
        }

        return true;
@ -225,7 +227,6 @@ public:

 private:
    size_t max_atoms;
-    size_t num_atoms;

    const FunctionOverloadResolverPtr and_resolver;
    const FunctionOverloadResolverPtr or_resolver;
@ -516,8 +517,8 @@ std::optional<CNF> CNF::tryBuildCNF(const QueryTreeNodePtr & node, ContextPtr co
        visitor.visit(node_cloned, false);
    }

-    if (PushOrVisitor visitor(context, max_atoms, atom_count);
-        !visitor.visit(node_cloned))
+    if (PushOrVisitor visitor(context, max_atoms);
+        !visitor.visit(node_cloned, atom_count))
            return std::nullopt;

    CollectGroupsVisitor collect_visitor;
--- a/src/Bridge/IBridge.cpp
+++ b/src/Bridge/IBridge.cpp
@ -214,14 +214,14 @@ int IBridge::main(const std::vector<std::string> & /*args*/)

    Poco::Net::ServerSocket socket;
    auto address = socketBindListen(socket, hostname, port, log);
-    socket.setReceiveTimeout(http_timeout);
-    socket.setSendTimeout(http_timeout);
+    socket.setReceiveTimeout(Poco::Timespan(http_timeout, 0));
+    socket.setSendTimeout(Poco::Timespan(http_timeout, 0));

    Poco::ThreadPool server_pool(3, max_server_connections);

    Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams;
-    http_params->setTimeout(http_timeout);
-    http_params->setKeepAliveTimeout(keep_alive_timeout);
+    http_params->setTimeout(Poco::Timespan(http_timeout, 0));
+    http_params->setKeepAliveTimeout(Poco::Timespan(keep_alive_timeout, 0));

    auto shared_context = Context::createShared();
    auto context = Context::createGlobal(shared_context.get());
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -353,6 +353,10 @@ target_link_libraries(clickhouse_common_io
            Poco::Foundation
 )

+if (TARGET ch_contrib::fiu)
+    target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::fiu)
+endif()
+
 if (TARGET ch_contrib::cpuid)
    target_link_libraries(clickhouse_common_io PRIVATE ch_contrib::cpuid)
 endif()
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@ -573,6 +573,13 @@ try
                CompressionMethod compression_method = chooseCompressionMethod(out_file, compression_method_string);
                UInt64 compression_level = 3;

+                if (query_with_output->is_outfile_append && compression_method != CompressionMethod::None)
+                {
+                    throw Exception(
+                        ErrorCodes::BAD_ARGUMENTS,
+                        "Cannot append to compressed file. Please use uncompressed file or remove APPEND keyword.");
+                }
+
                if (query_with_output->compression_level)
                {
                    const auto & compression_level_node = query_with_output->compression_level->as<ASTLiteral &>();
@ -587,8 +594,14 @@ try
                            range.second);
                }

+                auto flags = O_WRONLY | O_EXCL;
+                if (query_with_output->is_outfile_append)
+                    flags |= O_APPEND;
+                else
+                    flags |= O_CREAT;
+
                out_file_buf = wrapWriteBufferWithCompressionMethod(
-                    std::make_unique<WriteBufferFromFile>(out_file, DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_EXCL | O_CREAT),
+                    std::make_unique<WriteBufferFromFile>(out_file, DBMS_DEFAULT_BUFFER_SIZE, flags),
                    compression_method,
                    static_cast<int>(compression_level)
                );
--- a/src/Columns/ColumnFunction.cpp
+++ b/src/Columns/ColumnFunction.cpp
@ -264,7 +264,9 @@ void ColumnFunction::appendArgument(const ColumnWithTypeAndName & column)
        throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot capture column {} because it has incompatible type: "
                        "got {}, but {} is expected.", argument_types.size(), column.type->getName(), argument_types[index]->getName());

-    captured_columns.push_back(column);
+    auto captured_column = column;
+    captured_column.column = captured_column.column->convertToFullColumnIfSparse();
+    captured_columns.push_back(std::move(captured_column));
 }

 DataTypePtr ColumnFunction::getResultType() const
--- a/src/Common/Documentation.cpp
+++ b/src/Common/Documentation.cpp
@ -1,30 +0,0 @@
-#include <Common/Documentation.h>
-
-namespace DB
-{
-
-std::string Documentation::examplesAsString() const
-{
-    std::string res;
-    for (const auto & [example_name, example_query] : examples)
-    {
-        res += example_name + ":\n\n";
-        res += "```sql\n";
-        res += example_query + "\n";
-        res += "```\n";
-    }
-    return res;
-}
-
-std::string Documentation::categoriesAsString() const
-{
-    if (categories.empty())
-        return "";
-
-    std::string res = categories[0];
-    for (size_t i = 1; i < categories.size(); ++i)
-        res += ", " + categories[i];
-    return res;
-}
-
-}
--- a/src/Common/FailPoint.cpp
+++ b/src/Common/FailPoint.cpp
@ -0,0 +1,166 @@
+#include <Common/Exception.h>
+#include <Common/FailPoint.h>
+
+#include <boost/core/noncopyable.hpp>
+#include <chrono>
+#include <condition_variable>
+#include <mutex>
+#include <optional>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+extern const int LOGICAL_ERROR;
+};
+
+#if FIU_ENABLE
+static struct InitFiu
+{
+    InitFiu()
+    {
+        fiu_init(0);
+    }
+} init_fiu;
+#endif
+
+/// We should define different types of failpoints here. There are four types of them:
+/// - ONCE: the failpoint will only be triggered once.
+/// - REGULAR: the failpoint will always be triggered util disableFailPoint is called.
+/// - PAUSAEBLE_ONCE: the failpoint will be blocked one time when pauseFailPoint is called, util disableFailPoint is called.
+/// - PAUSAEBLE: the failpoint will be blocked every time when pauseFailPoint is called, util disableFailPoint is called.
+
+#define APPLY_FOR_FAILPOINTS(ONCE, REGULAR, PAUSEABLE_ONCE, PAUSEABLE) \
+    ONCE(replicated_merge_tree_commit_zk_fail_after_op) \
+    REGULAR(dummy_failpoint) \
+    PAUSEABLE_ONCE(dummy_pausable_failpoint_once) \
+    PAUSEABLE(dummy_pausable_failpoint)
+
+namespace FailPoints
+{
+#define M(NAME) extern const char(NAME)[] = #NAME "";
+APPLY_FOR_FAILPOINTS(M, M, M, M)
+#undef M
+}
+
+std::unordered_map<String, std::shared_ptr<FailPointChannel>> FailPointInjection::fail_point_wait_channels;
+std::mutex FailPointInjection::mu;
+class FailPointChannel : private boost::noncopyable
+{
+public:
+    explicit FailPointChannel(UInt64 timeout_)
+        : timeout_ms(timeout_)
+    {}
+    FailPointChannel()
+        : timeout_ms(0)
+    {}
+
+    void wait()
+    {
+        std::unique_lock lock(m);
+        if (timeout_ms == 0)
+            cv.wait(lock);
+        else
+            cv.wait_for(lock, std::chrono::milliseconds(timeout_ms));
+    }
+
+    void notifyAll()
+    {
+        std::unique_lock lock(m);
+        cv.notify_all();
+    }
+
+private:
+    UInt64 timeout_ms;
+    std::mutex m;
+    std::condition_variable cv;
+};
+
+void FailPointInjection::enablePauseFailPoint(const String & fail_point_name, UInt64 time_ms)
+{
+#define SUB_M(NAME, flags)                                                                                  \
+    if (fail_point_name == FailPoints::NAME)                                                                \
+    {                                                                                                       \
+        /* FIU_ONETIME -- Only fail once; the point of failure will be automatically disabled afterwards.*/ \
+        fiu_enable(FailPoints::NAME, 1, nullptr, flags);                                                    \
+        std::lock_guard lock(mu);                                                                           \
+        fail_point_wait_channels.try_emplace(FailPoints::NAME, std::make_shared<FailPointChannel>(time_ms));   \
+        return;                                                                                             \
+    }
+#define ONCE(NAME)
+#define REGULAR(NAME)
+#define PAUSEABLE_ONCE(NAME) SUB_M(NAME, FIU_ONETIME)
+#define PAUSEABLE(NAME) SUB_M(NAME, 0)
+    APPLY_FOR_FAILPOINTS(ONCE, REGULAR, PAUSEABLE_ONCE, PAUSEABLE)
+#undef SUB_M
+#undef ONCE
+#undef REGULAR
+#undef PAUSEABLE_ONCE
+#undef PAUSEABLE
+
+    throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot find fail point {}", fail_point_name);
+}
+
+void FailPointInjection::pauseFailPoint(const String & fail_point_name)
+{
+    fiu_do_on(fail_point_name.c_str(), FailPointInjection::wait(fail_point_name););
+}
+
+void FailPointInjection::enableFailPoint(const String & fail_point_name)
+{
+#if FIU_ENABLE
+#define SUB_M(NAME, flags, pause)                                                                               \
+    if (fail_point_name == FailPoints::NAME)                                                                    \
+    {                                                                                                           \
+        /* FIU_ONETIME -- Only fail once; the point of failure will be automatically disabled afterwards.*/     \
+        fiu_enable(FailPoints::NAME, 1, nullptr, flags);                                                        \
+        if (pause)                                                                                               \
+        {                                                                                                       \
+            std::lock_guard lock(mu);                                                                           \
+            fail_point_wait_channels.try_emplace(FailPoints::NAME, std::make_shared<FailPointChannel>());       \
+        }                                                                                                       \
+        return;                                                                                                 \
+    }
+#define ONCE(NAME) SUB_M(NAME, FIU_ONETIME, 0)
+#define REGULAR(NAME) SUB_M(NAME, 0, 0)
+#define PAUSEABLE_ONCE(NAME) SUB_M(NAME, FIU_ONETIME, 1)
+#define PAUSEABLE(NAME) SUB_M(NAME, 0, 1)
+    APPLY_FOR_FAILPOINTS(ONCE, REGULAR, PAUSEABLE_ONCE, PAUSEABLE)
+#undef SUB_M
+#undef ONCE
+#undef REGULAR
+#undef PAUSEABLE_ONCE
+#undef PAUSEABLE
+
+#endif
+    throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot find fail point {}", fail_point_name);
+}
+
+void FailPointInjection::disableFailPoint(const String & fail_point_name)
+{
+    std::lock_guard lock(mu);
+    if (auto iter = fail_point_wait_channels.find(fail_point_name); iter != fail_point_wait_channels.end())
+    {
+        /// can not rely on deconstruction to do the notify_all things, because
+        /// if someone wait on this, the deconstruct will never be called.
+        iter->second->notifyAll();
+        fail_point_wait_channels.erase(iter);
+    }
+    fiu_disable(fail_point_name.c_str());
+}
+
+void FailPointInjection::wait(const String & fail_point_name)
+{
+    std::unique_lock lock(mu);
+    if (auto iter = fail_point_wait_channels.find(fail_point_name); iter == fail_point_wait_channels.end())
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Can not find channel for fail point {}", fail_point_name);
+    else
+    {
+        lock.unlock();
+        auto ptr = iter->second;
+        ptr->wait();
+    }
+};
+
+}
--- a/src/Common/FailPoint.h
+++ b/src/Common/FailPoint.h
@ -0,0 +1,53 @@
+#pragma once
+#include "config.h"
+
+#include <Common/Exception.h>
+#include <Core/Types.h>
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wdocumentation"
+#pragma clang diagnostic ignored "-Wreserved-macro-identifier"
+#endif
+
+#include <fiu.h>
+#include <fiu-control.h>
+
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+
+#include <any>
+#include <unordered_map>
+
+namespace DB
+{
+
+/// This is a simple named failpoint library inspired by https://github.com/pingcap/tiflash
+/// The usage is simple:
+/// 1. define failpoint with a 'failpoint_name' in FailPoint.cpp
+/// 2. inject failpoint in normal code
+///   2.1 use fiu_do_on which can inject any code blocks, when it is a regular-triggered / once-triggered failpoint
+///   2.2 use pauseFailPoint when it is a pausable failpoint
+/// 3. in test file, we can use system failpoint enable/disable 'failpoint_name'
+
+class FailPointChannel;
+class FailPointInjection
+{
+public:
+
+    static void pauseFailPoint(const String & fail_point_name);
+
+    static void enableFailPoint(const String & fail_point_name);
+
+    static void enablePauseFailPoint(const String & fail_point_name, UInt64 time);
+
+    static void disableFailPoint(const String & fail_point_name);
+
+    static void wait(const String & fail_point_name);
+
+private:
+    static std::mutex mu;
+    static std::unordered_map<String, std::shared_ptr<FailPointChannel>> fail_point_wait_channels;
+};
+}
--- a/src/Common/FunctionDocumentation.cpp
+++ b/src/Common/FunctionDocumentation.cpp
@ -0,0 +1,44 @@
+#include <Common/FunctionDocumentation.h>
+
+namespace DB
+{
+
+std::string FunctionDocumentation::argumentsAsString() const
+{
+    std::string res;
+    for (const auto & [name, desc] : arguments)
+    {
+        res += "- " + name + ":" + desc + "\n";
+    }
+    return res;
+}
+
+std::string FunctionDocumentation::examplesAsString() const
+{
+    std::string res;
+    for (const auto & [name, query, result] : examples)
+    {
+        res += name + ":\n\n";
+        res += "``` sql\n";
+        res += query + "\n";
+        res += "```\n\n";
+        res += "``` text\n";
+        res += result + "\n";
+        res += "```\n";
+    }
+    return res;
+}
+
+std::string FunctionDocumentation::categoriesAsString() const
+{
+    if (categories.empty())
+        return "";
+
+    auto it = categories.begin();
+    std::string res = *it;
+    for (; it != categories.end(); ++it)
+        res += ", " + *it;
+    return res;
+}
+
+}
--- a/src/Common/FunctionDocumentation.h
+++ b/src/Common/FunctionDocumentation.h
@ -1,15 +1,14 @@
 #pragma once

+#include <set>
 #include <string>
 #include <vector>
-#include <map>


 namespace DB
 {

-/** Embedded reference documentation for high-level server components,
-  * such as SQL functions, table functions, data types, table engines, etc.
+/** Embedded reference documentation for functions.
  *
  * The advantages of embedded documentation are:
  * - it is easy to write and update with code;
@ -34,50 +33,49 @@ namespace DB
  * - examples (queries that can be referenced from the text by names);
  * - categories - one or a few text strings like {"Mathematical", "Array Processing"};
  *
-  * Only the description is mandatory.
-  *
  * The description should be represented in Markdown (or just plaintext).
  * Some extensions for Markdown are added:
  * - [example:name] will reference to an example with the corresponding name.
  *
  * Documentation does not support multiple languages.
  * The only available language is English.
-  *
-  * TODO: Allow to specify Syntax, Argument(s) and a Returned Value.
-  * TODO: Organize Examples as a struct of ExampleName, ExampleQuery and ExampleResult.
  */
-struct Documentation
+struct FunctionDocumentation
 {
    using Description = std::string;

    using Syntax = std::string;

-    using Argument = std::string;
+    struct Argument
+    {
+        std::string name;
+        std::string description;
+    };
    using Arguments = std::vector<Argument>;

    using ReturnedValue = std::string;

-    using ExampleName = std::string;
-    using ExampleQuery = std::string;
-    using Examples = std::map<ExampleName, ExampleQuery>;
+    struct Example
+    {
+        std::string name;
+        std::string query;
+        std::string result;
+    };
+    using Examples = std::vector<Example>;

    using Category = std::string;
-    using Categories = std::vector<Category>;
+    using Categories = std::set<Category>;

    using Related = std::string;

-    Description description;
-    Examples examples;
-    Categories categories;
-
-    Documentation(Description description_) : description(std::move(description_)) {} /// NOLINT
-    Documentation(Description description_, Examples examples_) : description(std::move(description_)), examples(std::move(examples_)) {}
-    Documentation(Description description_, Examples examples_, Categories categories_)
-        : description(std::move(description_)), examples(std::move(examples_)), categories(std::move(categories_)) {}
-
-    /// TODO: Please remove this constructor. Documentation should always be non-empty.
-    Documentation() = default;
+    Description description;        /// E.g. "Returns the position (in bytes, starting at 1) of a substring needle in a string haystack."
+    Syntax syntax;                  /// E.g. "position(haystack, needle)"
+    Arguments arguments;            /// E.g. ["haystack — String in which the search is performed. String.", "needle — Substring to be searched. String."]
+    ReturnedValue returned_value;   /// E.g. "Starting position in bytes and counting from 1, if the substring was found."
+    Examples examples;              ///
+    Categories categories;          /// E.g. {"String Search"}

+    std::string argumentsAsString() const;
    std::string examplesAsString() const;
    std::string categoriesAsString() const;
 };
--- a/src/Common/Volnitsky.h
+++ b/src/Common/Volnitsky.h
@ -386,8 +386,6 @@ protected:
    FallbackSearcher fallback_searcher;

 public:
-    using Searcher = FallbackSearcher;
-
    /** haystack_size_hint - the expected total size of the haystack for `search` calls. Optional (zero means unspecified).
      * If you specify it small enough, the fallback algorithm will be used,
      *  since it is considered that it's useless to waste time initializing the hash table.
@ -729,7 +727,7 @@ public:


 using Volnitsky = VolnitskyBase<true, true, ASCIICaseSensitiveStringSearcher>;
-using VolnitskyUTF8 = VolnitskyBase<true, false, ASCIICaseSensitiveStringSearcher>; /// exactly same as Volnitsky
+using VolnitskyUTF8 = VolnitskyBase<true, false, UTF8CaseSensitiveStringSearcher>;
 using VolnitskyCaseInsensitive = VolnitskyBase<false, true, ASCIICaseInsensitiveStringSearcher>; /// ignores non-ASCII bytes
 using VolnitskyCaseInsensitiveUTF8 = VolnitskyBase<false, false, UTF8CaseInsensitiveStringSearcher>;

@ -737,7 +735,7 @@ using VolnitskyCaseSensitiveToken = VolnitskyBase<true, true, ASCIICaseSensitive
 using VolnitskyCaseInsensitiveToken = VolnitskyBase<false, true, ASCIICaseInsensitiveTokenSearcher>;

 using MultiVolnitsky = MultiVolnitskyBase<true, true, ASCIICaseSensitiveStringSearcher>;
-using MultiVolnitskyUTF8 = MultiVolnitskyBase<true, false, ASCIICaseSensitiveStringSearcher>;
+using MultiVolnitskyUTF8 = MultiVolnitskyBase<true, false, UTF8CaseSensitiveStringSearcher>;
 using MultiVolnitskyCaseInsensitive = MultiVolnitskyBase<false, true, ASCIICaseInsensitiveStringSearcher>;
 using MultiVolnitskyCaseInsensitiveUTF8 = MultiVolnitskyBase<false, false, UTF8CaseInsensitiveStringSearcher>;

--- a/src/Common/ZooKeeper/ZooKeeperWithFaultInjection.h
+++ b/src/Common/ZooKeeper/ZooKeeperWithFaultInjection.h
@ -19,18 +19,27 @@ namespace ErrorCodes
 class RandomFaultInjection
 {
 public:
+    bool must_fail_after_op = false;
+    bool must_fail_before_op = false;
+
    RandomFaultInjection(double probability, UInt64 seed_) : rndgen(seed_), distribution(probability) { }

    void beforeOperation()
    {
-        if (distribution(rndgen))
+        if (distribution(rndgen) || must_fail_before_op)
+        {
+            must_fail_before_op = false;
            throw zkutil::KeeperException("Fault injection before operation", Coordination::Error::ZSESSIONEXPIRED);
        }
+    }
    void afterOperation()
    {
-        if (distribution(rndgen))
+        if (distribution(rndgen) || must_fail_after_op)
+        {
+            must_fail_after_op = false;
            throw zkutil::KeeperException("Fault injection after operation", Coordination::Error::ZOPERATIONTIMEOUT);
        }
+    }

 private:
    std::mt19937_64 rndgen;
@ -42,6 +51,9 @@ private:
 ///
 class ZooKeeperWithFaultInjection
 {
+    template<bool async_insert>
+    friend class ReplicatedMergeTreeSinkImpl;
+
    using zk = zkutil::ZooKeeper;

    zk::Ptr keeper;
--- a/src/Common/atomicRename.cpp
+++ b/src/Common/atomicRename.cpp
@ -42,7 +42,7 @@ namespace ErrorCodes
        #define __NR_renameat2 316
    #elif defined(__aarch64__)
        #define __NR_renameat2 276
-    #elif defined(__ppc64__)
+    #elif defined(__powerpc64__)
        #define __NR_renameat2 357
    #elif defined(__riscv)
        #define __NR_renameat2 276
--- a/src/Common/config.h.in
+++ b/src/Common/config.h.in
@ -57,4 +57,5 @@
 #cmakedefine01 USE_SKIM
 #cmakedefine01 USE_OPENSSL_INTREE
 #cmakedefine01 USE_ULID
+#cmakedefine01 FIU_ENABLE
 #cmakedefine01 USE_BCRYPT
--- a/src/Common/format.h
+++ b/src/Common/format.h
@ -17,18 +17,18 @@ namespace Format
 {
    using IndexPositions = PODArrayWithStackMemory<UInt64, 64>;

-    static inline void parseNumber(const String & description, UInt64 l, UInt64 r, UInt64 & res, UInt64 argument_number)
+    static inline UInt64 parseNumber(const String & description, UInt64 l, UInt64 r, UInt64 argument_number)
    {
-        res = 0;
+        UInt64 res = 0;
        for (UInt64 pos = l; pos < r; ++pos)
        {
            if (!isNumericASCII(description[pos]))
-                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Not a number in curly braces at position {}", std::to_string(pos));
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Not a number in curly braces at position {}", pos);
            res = res * 10 + description[pos] - '0';
            if (res >= argument_number)
-                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Too big number for arguments, must be at most {}",
-                    argument_number - 1);
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Too big number for arguments, must be at most {}", argument_number - 1);
        }
+        return res;
    }

    static inline void init(
@ -132,8 +132,7 @@ namespace Format
                        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot switch from automatic field numbering to manual field specification");
                    is_plain_numbering = false;

-                    UInt64 arg;
-                    parseNumber(pattern, last_open, i, arg, argument_number);
+                    UInt64 arg = parseNumber(pattern, last_open, i, argument_number);

                    if (arg >= argument_number)
                        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Argument is too big for formatting. Note that indexing starts from zero");
--- a/src/Common/tests/gtest_thread_pool_schedule_exception.cpp
+++ b/src/Common/tests/gtest_thread_pool_schedule_exception.cpp
@ -56,8 +56,17 @@ static bool check2()
 {
    ThreadPool pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, 2);

+    try
+    {
        pool.scheduleOrThrowOnError([&]{ throw std::runtime_error("Hello, world!"); });
        pool.scheduleOrThrowOnError([]{});
+    }
+    catch (const std::runtime_error &)
+    {
+        /// Sometimes exception may be thrown from schedule.
+        /// Just retry test in that case.
+        return true;
+    }

    try
    {
--- a/src/Common/waitForPid.cpp
+++ b/src/Common/waitForPid.cpp
@ -41,7 +41,7 @@ enum PollPidResult
        #define SYS_pidfd_open 434
    #elif defined(__aarch64__)
        #define SYS_pidfd_open 434
-    #elif defined(__ppc64__)
+    #elif defined(__powerpc64__)
        #define SYS_pidfd_open 434
    #elif defined(__riscv)
        #define SYS_pidfd_open 434
--- a/src/Core/BaseSettings.h
+++ b/src/Core/BaseSettings.h
@ -501,9 +501,11 @@ void BaseSettings<TTraits>::read(ReadBuffer & in, SettingsWriteFormat format)
    const auto & accessor = Traits::Accessor::instance();
    while (true)
    {
-        String name = BaseSettingsHelpers::readString(in);
-        if (name.empty() /* empty string is a marker of the end of settings */)
+        String read_name = BaseSettingsHelpers::readString(in);
+        if (read_name.empty() /* empty string is a marker of the end of settings */)
            break;
+
+        std::string_view name = TTraits::resolveName(read_name);
        size_t index = accessor.find(name);

        using Flags = BaseSettingsHelpers::Flags;
--- a/src/Core/MySQL/Authentication.cpp
+++ b/src/Core/MySQL/Authentication.cpp
@ -31,7 +31,7 @@ namespace Authentication
 static const size_t SCRAMBLE_LENGTH = 20;

 /** Generate a random string using ASCII characters but avoid separator character,
-  * produce pseudo random numbers between with about 7 bit worth of entropty between 1-127.
+  * produce pseudo random numbers between with about 7 bit worth of entropy between 1-127.
  * https://github.com/mysql/mysql-server/blob/8.0/mysys/crypt_genhash_impl.cc#L427
  */
 static String generateScramble()
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -560,6 +560,7 @@ class IColumn;
    M(Bool, asterisk_include_alias_columns, false, "Include ALIAS columns for wildcard query", 0) \
    M(Bool, optimize_skip_merged_partitions, false, "Skip partitions with one part with level > 0 in optimize final", 0) \
    M(Bool, optimize_on_insert, true, "Do the same transformation for inserted block of data as if merge was done on this block.", 0) \
+    M(Bool, optimize_use_projections, true, "Automatically choose projections to perform SELECT query", 0) \
    M(Bool, force_optimize_projection, false, "If projection optimization is enabled, SELECT queries need to use projection", 0) \
    M(Bool, async_socket_for_remote, true, "Asynchronously read from socket executing remote query", 0) \
    M(Bool, async_query_sending_for_remote, true, "Asynchronously create connections and send query to shards in remote query", 0) \
@ -641,7 +642,7 @@ class IColumn;
    M(UInt64, function_range_max_elements_in_block, 500000000, "Maximum number of values generated by function 'range' per block of data (sum of array sizes for every row in a block, see also 'max_block_size' and 'min_insert_block_size_rows'). It is a safety threshold.", 0) \
    M(ShortCircuitFunctionEvaluation, short_circuit_function_evaluation, ShortCircuitFunctionEvaluation::ENABLE, "Setting for short-circuit function evaluation configuration. Possible values: 'enable' - use short-circuit function evaluation for functions that are suitable for it, 'disable' - disable short-circuit function evaluation, 'force_enable' - use short-circuit function evaluation for all functions.", 0) \
    \
-    M(LocalFSReadMethod, storage_file_read_method, LocalFSReadMethod::mmap, "Method of reading data from storage file, one of: read, pread, mmap.", 0) \
+    M(LocalFSReadMethod, storage_file_read_method, LocalFSReadMethod::mmap, "Method of reading data from storage file, one of: read, pread, mmap. The mmap method does not apply to clickhouse-server (it's intended for clickhouse-local).", 0) \
    M(String, local_filesystem_read_method, "pread_threadpool", "Method of reading data from local filesystem, one of: read, pread, mmap, io_uring, pread_threadpool. The 'io_uring' method is experimental and does not work for Log, TinyLog, StripeLog, File, Set and Join, and other tables with append-able files in presence of concurrent reads and writes.", 0) \
    M(String, remote_filesystem_read_method, "threadpool", "Method of reading data from remote filesystem, one of: read, threadpool.", 0) \
    M(Bool, local_filesystem_read_prefetch, false, "Should use prefetching when reading data from local filesystem.", 0) \
@ -715,26 +716,12 @@ class IColumn;
    M(Milliseconds, storage_system_stack_trace_pipe_read_timeout_ms, 100, "Maximum time to read from a pipe for receiving information from the threads when querying the `system.stack_trace` table. This setting is used for testing purposes and not meant to be changed by users.", 0) \
    \
    M(Bool, parallelize_output_from_storages, true, "Parallelize output for reading step from storage. It allows parallelizing query processing right after reading from storage if possible", 0) \
-    \
-    /** Experimental functions */ \
-    M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \
-    M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \
-    M(Bool, allow_experimental_hash_functions, false, "Enable experimental hash functions (hashid, etc)", 0) \
-    M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \
    M(String, insert_deduplication_token, "", "If not empty, used for duplicate detection instead of data digest", 0) \
    M(String, ann_index_select_query_params, "", "Parameters passed to ANN indexes in SELECT queries, the format is 'param1=x, param2=y, ...'", 0) \
-    M(UInt64, max_limit_for_ann_queries, 1000000, "Maximum limit value for using ANN indexes is used to prevent memory overflow in search queries for indexes", 0) \
-    M(Bool, allow_experimental_annoy_index, false, "Allows to use Annoy index. Disabled by default because this feature is experimental", 0) \
    M(Bool, count_distinct_optimization, false, "Rewrite count distinct to subquery of group by", 0) \
-    M(Bool, throw_on_unsupported_query_inside_transaction, true, "Throw exception if unsupported query is used inside transaction", 0) \
-    M(TransactionsWaitCSNMode, wait_changes_become_visible_after_commit_mode, TransactionsWaitCSNMode::WAIT_UNKNOWN, "Wait for committed changes to become actually visible in the latest snapshot", 0) \
-    M(Bool, implicit_transaction, false, "If enabled and not already inside a transaction, wraps the query inside a full transaction (begin + commit or rollback)", 0) \
    M(Bool, throw_if_no_data_to_insert, true, "Enables or disables empty INSERTs, enabled by default", 0) \
    M(Bool, compatibility_ignore_auto_increment_in_create_table, false, "Ignore AUTO_INCREMENT keyword in column declaration if true, otherwise return error. It simplifies migration from MySQL", 0) \
    M(Bool, multiple_joins_try_to_keep_original_names, false, "Do not add aliases to top level expression list on multiple joins rewrite", 0) \
-    M(UInt64, grace_hash_join_initial_buckets, 1, "Initial number of grace hash join buckets", 0) \
-    M(UInt64, grace_hash_join_max_buckets, 1024, "Limit on the number of grace hash join buckets", 0) \
-    M(Bool, optimize_distinct_in_order, false, "This optimization has a bug and it is disabled. Enable DISTINCT optimization if some columns in DISTINCT form a prefix of sorting. For example, prefix of sorting key in merge tree or ORDER BY statement", 0) \
    M(Bool, optimize_sorting_by_input_stream_properties, true, "Optimize sorting by sorting properties of input stream", 0) \
    M(UInt64, insert_keeper_max_retries, 20, "Max retries for keeper operations during insert", 0) \
    M(UInt64, insert_keeper_retry_initial_backoff_ms, 100, "Initial backoff timeout for keeper operations during insert", 0) \
@ -743,10 +730,24 @@ class IColumn;
    M(UInt64, insert_keeper_fault_injection_seed, 0, "0 - random seed, otherwise the setting value", 0) \
    M(Bool, force_aggregation_in_order, false, "Force use of aggregation in order on remote nodes during distributed aggregation. PLEASE, NEVER CHANGE THIS SETTING VALUE MANUALLY!", IMPORTANT) \
    M(UInt64, http_max_request_param_data_size, 10_MiB, "Limit on size of request data used as a query parameter in predefined HTTP requests.", 0) \
+    M(Bool, function_json_value_return_type_allow_nullable, false, "Allow function JSON_VALUE to return nullable type.", 0) \
+    M(Bool, function_json_value_return_type_allow_complex, false, "Allow function JSON_VALUE to return complex type, such as: struct, array, map.", 0) \
+    \
+    /** Experimental functions */ \
+    M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \
+    M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \
+    M(Bool, allow_experimental_hash_functions, false, "Enable experimental hash functions (hashid, etc)", 0) \
+    M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \
+    M(Bool, allow_experimental_annoy_index, false, "Allows to use Annoy index. Disabled by default because this feature is experimental", 0) \
+    M(UInt64, max_limit_for_ann_queries, 1000000, "Maximum limit value for using ANN indexes is used to prevent memory overflow in search queries for indexes", 0) \
+    M(Bool, throw_on_unsupported_query_inside_transaction, true, "Throw exception if unsupported query is used inside transaction", 0) \
+    M(TransactionsWaitCSNMode, wait_changes_become_visible_after_commit_mode, TransactionsWaitCSNMode::WAIT_UNKNOWN, "Wait for committed changes to become actually visible in the latest snapshot", 0) \
+    M(Bool, implicit_transaction, false, "If enabled and not already inside a transaction, wraps the query inside a full transaction (begin + commit or rollback)", 0) \
+    M(UInt64, grace_hash_join_initial_buckets, 1, "Initial number of grace hash join buckets", 0) \
+    M(UInt64, grace_hash_join_max_buckets, 1024, "Limit on the number of grace hash join buckets", 0) \
+    M(Bool, optimize_distinct_in_order, true, "Enable DISTINCT optimization if some columns in DISTINCT form a prefix of sorting. For example, prefix of sorting key in merge tree or ORDER BY statement", 0) \
    M(Bool, allow_experimental_undrop_table_query, false, "Allow to use undrop query to restore dropped table in a limited time", 0) \
    M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \
-    M(Bool, function_json_value_return_type_allow_nullable, false, "Allow function to return nullable type.", 0) \
-    M(Bool, function_json_value_return_type_allow_complex, false, "Allow function to return complex type, such as: struct, array, map.", 0) \
    // End of COMMON_SETTINGS
    // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS.

@ -972,7 +973,6 @@ class IColumn;
    M(Bool, output_format_bson_string_as_string, false, "Use BSON String type instead of Binary for String columns.", 0) \
    M(Bool, input_format_bson_skip_fields_with_unsupported_types_in_schema_inference, false, "Skip fields with unsupported types while schema inference for format BSON.", 0) \
    \
-    M(Bool, regexp_dict_allow_other_sources, false, "Allow regexp_tree dictionary to use sources other than yaml source.", 0) \
    M(Bool, format_display_secrets_in_show_and_select, false, "Do not hide secrets in SHOW and SELECT queries.", IMPORTANT) \
    M(Bool, regexp_dict_allow_hyperscan, true, "Allow regexp_tree dictionary using Hyperscan library.", 0) \
    \
--- a/src/DataTypes/Serializations/SerializationArray.cpp
+++ b/src/DataTypes/Serializations/SerializationArray.cpp
@ -32,9 +32,9 @@ void SerializationArray::serializeBinary(const Field & field, WriteBuffer & ostr
 {
    const Array & a = field.get<const Array &>();
    writeVarUInt(a.size(), ostr);
-    for (size_t i = 0; i < a.size(); ++i)
+    for (const auto & i : a)
    {
-        nested->serializeBinary(a[i], ostr, settings);
+        nested->serializeBinary(i, ostr, settings);
    }
 }

--- a/src/DataTypes/Serializations/SerializationInfo.cpp
+++ b/src/DataTypes/Serializations/SerializationInfo.cpp
@ -246,7 +246,8 @@ void SerializationInfoByName::writeJSON(WriteBuffer & out) const
    return writeString(oss.str(), out);
 }

-void SerializationInfoByName::readJSON(ReadBuffer & in)
+SerializationInfoByName SerializationInfoByName::readJSON(
+    const NamesAndTypesList & columns, const Settings & settings, ReadBuffer & in)
 {
    String json_str;
    readString(json_str, in);
@ -262,8 +263,13 @@ void SerializationInfoByName::readJSON(ReadBuffer & in)
            "Unknown version of serialization infos ({}). Should be less or equal than {}",
            object->getValue<size_t>(KEY_VERSION), SERIALIZATION_INFO_VERSION);

+    SerializationInfoByName infos;
    if (object->has(KEY_COLUMNS))
    {
+        std::unordered_map<std::string_view, const IDataType *> column_type_by_name;
+        for (const auto & [name, type] : columns)
+            column_type_by_name.emplace(name, type.get());
+
        auto array = object->getArray(KEY_COLUMNS);
        for (const auto & elem : *array)
        {
@ -271,13 +277,22 @@ void SerializationInfoByName::readJSON(ReadBuffer & in)

            if (!elem_object->has(KEY_NAME))
                throw Exception(ErrorCodes::CORRUPTED_DATA,
-                    "Missed field '{}' in SerializationInfo of columns", KEY_NAME);
+                    "Missed field '{}' in serialization infos", KEY_NAME);

            auto name = elem_object->getValue<String>(KEY_NAME);
-            if (auto it = find(name); it != end())
-                it->second->fromJSON(*elem_object);
-        }
+            auto it = column_type_by_name.find(name);
+
+            if (it == column_type_by_name.end())
+                throw Exception(ErrorCodes::CORRUPTED_DATA,
+                    "Found unexpected column '{}' in serialization infos", name);
+
+            auto info = it->second->createSerializationInfo(settings);
+            info->fromJSON(*elem_object);
+            infos.emplace(name, std::move(info));
        }
    }

+    return infos;
+}
+
 }
--- a/src/DataTypes/Serializations/SerializationInfo.h
+++ b/src/DataTypes/Serializations/SerializationInfo.h
@ -96,8 +96,10 @@ using MutableSerializationInfos = std::vector<MutableSerializationInfoPtr>;
 class SerializationInfoByName : public std::map<String, MutableSerializationInfoPtr>
 {
 public:
+    using Settings = SerializationInfo::Settings;
+
    SerializationInfoByName() = default;
-    SerializationInfoByName(const NamesAndTypesList & columns, const SerializationInfo::Settings & settings);
+    SerializationInfoByName(const NamesAndTypesList & columns, const Settings & settings);

    void add(const Block & block);
    void add(const SerializationInfoByName & other);
@ -108,7 +110,9 @@ public:
    void replaceData(const SerializationInfoByName & other);

    void writeJSON(WriteBuffer & out) const;
-    void readJSON(ReadBuffer & in);
+
+    static SerializationInfoByName readJSON(
+        const NamesAndTypesList & columns, const Settings & settings, ReadBuffer & in);
 };

 }
--- a/src/Databases/MySQL/DatabaseMySQL.cpp
+++ b/src/Databases/MySQL/DatabaseMySQL.cpp
@ -67,7 +67,7 @@ DatabaseMySQL::DatabaseMySQL(
    try
    {
        /// Test that the database is working fine; it will also fetch tables.
-        empty();
+        empty(); // NOLINT(bugprone-standalone-empty)
    }
    catch (...)
    {
--- a/src/Dictionaries/ClickHouseDictionarySource.cpp
+++ b/src/Dictionaries/ClickHouseDictionarySource.cpp
@ -71,11 +71,11 @@ ClickHouseDictionarySource::ClickHouseDictionarySource(
    : update_time{std::chrono::system_clock::from_time_t(0)}
    , dict_struct{dict_struct_}
    , configuration{configuration_}
-    , query_builder{dict_struct, configuration.db, "", configuration.table, configuration.query, configuration.where, IdentifierQuotingStyle::Backticks}
+    , query_builder(std::make_shared<ExternalQueryBuilder>(dict_struct, configuration.db, "", configuration.table, configuration.query, configuration.where, IdentifierQuotingStyle::Backticks))
    , sample_block{sample_block_}
    , context(context_)
    , pool{createPool(configuration)}
-    , load_all_query{query_builder.composeLoadAllQuery()}
+    , load_all_query{query_builder->composeLoadAllQuery()}
 {
 }

@ -84,7 +84,7 @@ ClickHouseDictionarySource::ClickHouseDictionarySource(const ClickHouseDictionar
    , dict_struct{other.dict_struct}
    , configuration{other.configuration}
    , invalidate_query_response{other.invalidate_query_response}
-    , query_builder{dict_struct, configuration.db, "", configuration.table, configuration.query, configuration.where, IdentifierQuotingStyle::Backticks}
+    , query_builder(std::make_shared<ExternalQueryBuilder>(dict_struct, configuration.db, "", configuration.table, configuration.query, configuration.where, IdentifierQuotingStyle::Backticks))
    , sample_block{other.sample_block}
    , context(Context::createCopy(other.context))
    , pool{createPool(configuration)}
@ -99,12 +99,12 @@ std::string ClickHouseDictionarySource::getUpdateFieldAndDate()
        time_t hr_time = std::chrono::system_clock::to_time_t(update_time) - configuration.update_lag;
        std::string str_time = DateLUT::instance().timeToString(hr_time);
        update_time = std::chrono::system_clock::now();
-        return query_builder.composeUpdateQuery(configuration.update_field, str_time);
+        return query_builder->composeUpdateQuery(configuration.update_field, str_time);
    }
    else
    {
        update_time = std::chrono::system_clock::now();
-        return query_builder.composeLoadAllQuery();
+        return query_builder->composeLoadAllQuery();
    }
 }

@ -121,13 +121,13 @@ QueryPipeline ClickHouseDictionarySource::loadUpdatedAll()

 QueryPipeline ClickHouseDictionarySource::loadIds(const std::vector<UInt64> & ids)
 {
-    return createStreamForQuery(query_builder.composeLoadIdsQuery(ids));
+    return createStreamForQuery(query_builder->composeLoadIdsQuery(ids));
 }


 QueryPipeline ClickHouseDictionarySource::loadKeys(const Columns & key_columns, const std::vector<size_t> & requested_rows)
 {
-    String query = query_builder.composeLoadKeysQuery(key_columns, requested_rows, ExternalQueryBuilder::IN_WITH_TUPLES);
+    String query = query_builder->composeLoadKeysQuery(key_columns, requested_rows, ExternalQueryBuilder::IN_WITH_TUPLES);
    return createStreamForQuery(query);
 }

--- a/src/Dictionaries/ClickHouseDictionarySource.h
+++ b/src/Dictionaries/ClickHouseDictionarySource.h
@ -78,11 +78,11 @@ private:
    const DictionaryStructure dict_struct;
    const Configuration configuration;
    mutable std::string invalidate_query_response;
-    ExternalQueryBuilder query_builder;
+    ExternalQueryBuilderPtr query_builder;
    Block sample_block;
    ContextMutablePtr context;
    ConnectionPoolWithFailoverPtr pool;
-    const std::string load_all_query;
+    std::string load_all_query;
    Poco::Logger * log = &Poco::Logger::get("ClickHouseDictionarySource");

    /// RegExpTreeDictionary is the only dictionary whose structure of attributions differ from the input block.
--- a/src/Dictionaries/ExternalQueryBuilder.h
+++ b/src/Dictionaries/ExternalQueryBuilder.h
@ -36,6 +36,10 @@ struct ExternalQueryBuilder
        const std::string & where_,
        IdentifierQuotingStyle quoting_style_);

+    ExternalQueryBuilder(const ExternalQueryBuilder &) = default;
+
+    virtual ~ExternalQueryBuilder() = default;
+
    /** Generate a query to load all data. */
    std::string composeLoadAllQuery() const;

@ -61,10 +65,10 @@ struct ExternalQueryBuilder
    std::string composeLoadKeysQuery(const Columns & key_columns, const std::vector<size_t> & requested_rows, LoadKeysMethod method, size_t partition_key_prefix = 0) const;


-private:
+protected:
    const FormatSettings format_settings = {};

-    void composeLoadAllQuery(WriteBuffer & out) const;
+    virtual void composeLoadAllQuery(WriteBuffer & out) const;

    /// In the following methods `beg` and `end` specifies which columns to write in expression

@ -93,4 +97,6 @@ private:
    void writeQuoted(const std::string & s, WriteBuffer & out) const;
 };

+using ExternalQueryBuilderPtr = std::shared_ptr<ExternalQueryBuilder>;
+
 }
--- a/src/Dictionaries/RegExpTreeDictionary.cpp
+++ b/src/Dictionaries/RegExpTreeDictionary.cpp
@ -20,6 +20,7 @@
 #include <Functions/Regexps.h>
 #include <Functions/checkHyperscanRegexp.h>
 #include <QueryPipeline/QueryPipeline.h>
+#include <Processors/Sources/BlocksListSource.h>

 #include <Dictionaries/ClickHouseDictionarySource.h>
 #include <Dictionaries/DictionaryFactory.h>
@ -86,6 +87,32 @@ namespace
    }
 }

+struct ExternalRegexpQueryBuilder final : public ExternalQueryBuilder
+{
+    explicit ExternalRegexpQueryBuilder(const ExternalQueryBuilder & builder) : ExternalQueryBuilder(builder) {}
+
+    void composeLoadAllQuery(WriteBuffer & out) const override
+    {
+        writeString("SELECT id, parent_id, regexp, keys, values FROM ", out);
+        if (!db.empty())
+        {
+            writeQuoted(db, out);
+            writeChar('.', out);
+        }
+        if (!schema.empty())
+        {
+            writeQuoted(schema, out);
+            writeChar('.', out);
+        }
+        writeQuoted(table, out);
+        if (!where.empty())
+        {
+            writeString(" WHERE ", out);
+            writeString(where, out);
+        }
+    }
+};
+
 struct RegExpTreeDictionary::RegexTreeNode
 {
    std::vector<UInt64> children;
@ -117,6 +144,7 @@ struct RegExpTreeDictionary::RegexTreeNode
    {
        Field field;
        std::vector<StringPiece> pieces;
+        String original_value;

        constexpr bool containsBackRefs() const { return !pieces.empty(); }
    };
@ -208,12 +236,12 @@ void RegExpTreeDictionary::initRegexNodes(Block & block)
                auto string_pieces = createStringPieces(value, num_captures, regex, logger);
                if (!string_pieces.empty())
                {
-                    node->attributes[name_] = RegexTreeNode::AttributeValue{.field = values[j], .pieces = std::move(string_pieces)};
+                    node->attributes[name_] = RegexTreeNode::AttributeValue{.field = values[j], .pieces = std::move(string_pieces), .original_value = value};
                }
                else
                {
-                    Field field = parseStringToField(values[j].safeGet<String>(), attr.type);
-                    node->attributes[name_] = RegexTreeNode::AttributeValue{.field = std::move(field)};
+                    Field field = parseStringToField(value, attr.type);
+                    node->attributes[name_] = RegexTreeNode::AttributeValue{.field = std::move(field), .original_value = value};
                }
            }
        }
@ -383,6 +411,8 @@ RegExpTreeDictionary::RegExpTreeDictionary(
        sample_block.insert(ColumnWithTypeAndName(std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), kKeys));
        sample_block.insert(ColumnWithTypeAndName(std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), kValues));
        ch_source->sample_block = std::move(sample_block);
+        ch_source->query_builder = std::make_shared<ExternalRegexpQueryBuilder>(*ch_source->query_builder);
+        ch_source->load_all_query = ch_source->query_builder->composeLoadAllQuery();
    }

    loadData();
@ -651,6 +681,52 @@ std::unordered_map<String, ColumnPtr> RegExpTreeDictionary::match(
    return result;
 }

+Pipe RegExpTreeDictionary::read(const Names & , size_t max_block_size, size_t) const
+{
+
+    auto it = regex_nodes.begin();
+    size_t block_size = 0;
+    BlocksList result;
+
+    for (;;)
+    {
+        Block block;
+        auto col_id = std::make_shared<DataTypeUInt64>()->createColumn();
+        auto col_pid = std::make_shared<DataTypeUInt64>()->createColumn();
+        auto col_regex = std::make_shared<DataTypeString>()->createColumn();
+        auto col_keys = std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())->createColumn();
+        auto col_values = std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())->createColumn();
+
+        for (;it != regex_nodes.end() && block_size < max_block_size; it++, block_size++)
+        {
+            col_id->insert(it->first);
+            const auto & node = it->second;
+            col_pid->insert(node->parent_id);
+            col_regex->insert(node->regex);
+            std::vector<Field> keys, values;
+            for (const auto & [key, attr] : node->attributes)
+            {
+                keys.push_back(key);
+                values.push_back(attr.original_value);
+            }
+            col_keys->insert(Array(keys.begin(), keys.end()));
+            col_values->insert(Array(values.begin(), values.end()));
+        }
+
+        block.insert(ColumnWithTypeAndName(std::move(col_id),std::make_shared<DataTypeUInt64>(),kId));
+        block.insert(ColumnWithTypeAndName(std::move(col_pid),std::make_shared<DataTypeUInt64>(),kParentId));
+        block.insert(ColumnWithTypeAndName(std::move(col_regex),std::make_shared<DataTypeString>(),kRegExp));
+        block.insert(ColumnWithTypeAndName(std::move(col_keys),std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()),kKeys));
+        block.insert(ColumnWithTypeAndName(std::move(col_values),std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()),kValues));
+        result.push_back(std::move(block));
+        if (it == regex_nodes.end())
+            break;
+        block_size = 0;
+    }
+
+    return Pipe(std::make_shared<BlocksListSource>(std::move(result)));
+}
+
 Columns RegExpTreeDictionary::getColumns(
    const Strings & attribute_names,
    const DataTypes & result_types,
@ -717,10 +793,6 @@ void registerDictionaryRegExpTree(DictionaryFactory & factory)
        const auto dict_id = StorageID::fromDictionaryConfig(config, config_prefix);

        auto context = copyContextAndApplySettingsFromDictionaryConfig(global_context, config, config_prefix);
-        if (!context->getSettings().regexp_dict_allow_other_sources && typeid_cast<YAMLRegExpTreeDictionarySource *>(source_ptr.get()) == nullptr)
-            throw Exception(ErrorCodes::INCORRECT_DICTIONARY_DEFINITION,
-                            "regexp_tree dictionary doesn't accept sources other than yaml source. "
-                            "To active it, please set regexp_dict_allow_other_sources=true");

        return std::make_unique<RegExpTreeDictionary>(dict_id, dict_struct, std::move(source_ptr), configuration, context->getSettings().regexp_dict_allow_hyperscan);
    };
--- a/src/Dictionaries/RegExpTreeDictionary.h
+++ b/src/Dictionaries/RegExpTreeDictionary.h
@ -22,6 +22,8 @@
 #include <Dictionaries/DictionaryStructure.h>
 #include <Dictionaries/IDictionary.h>

+#include <Storages/ColumnsDescription.h>
+
 namespace DB
 {

@ -91,10 +93,7 @@ public:
        throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Dictionary {} does not support method `hasKeys`", name);
    }

-    Pipe read(const Names &, size_t, size_t) const override
-    {
-        throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Dictionary {} does not support method `read`", name);
-    }
+    Pipe read(const Names & columns, size_t max_block_size, size_t num_streams) const override;

    ColumnPtr getColumn(
        const std::string & attribute_name,
--- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp
+++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp
@ -50,7 +50,7 @@ CachedOnDiskReadBufferFromFile::CachedOnDiskReadBufferFromFile(
    bool use_external_buffer_,
    std::optional<size_t> read_until_position_,
    std::shared_ptr<FilesystemCacheLog> cache_log_)
-    : ReadBufferFromFileBase(settings_.remote_fs_buffer_size, nullptr, 0, file_size_)
+    : ReadBufferFromFileBase(use_external_buffer_ ? 0 : settings_.remote_fs_buffer_size, nullptr, 0, file_size_)
 #ifndef NDEBUG
    , log(&Poco::Logger::get("CachedOnDiskReadBufferFromFile(" + source_file_path_ + ")"))
 #else
@ -151,10 +151,8 @@ CachedOnDiskReadBufferFromFile::getCacheReadBuffer(const FileSegment & file_segm
    /// Do not allow to use asynchronous version of LocalFSReadMethod.
    local_read_settings.local_fs_method = LocalFSReadMethod::pread;

-    // The buffer will unnecessarily allocate a Memory of size local_fs_buffer_size, which will then
-    // most likely be unused because we're swap()ping our own internal_buffer into
-    // implementation_buffer before each read. But we can't just set local_fs_buffer_size = 0 here
-    // because some buffer implementations actually use that memory (e.g. for prefetching).
+    if (use_external_buffer)
+        local_read_settings.local_fs_buffer_size = 0;

    auto buf = createReadBufferFromFileBase(path, local_read_settings);

@ -389,14 +387,6 @@ CachedOnDiskReadBufferFromFile::getImplementationBuffer(FileSegment & file_segme
    auto read_buffer_for_file_segment = getReadBufferForFileSegment(file_segment);

    watch.stop();
-    current_file_segment_counters.increment(
-        ProfileEvents::FileSegmentWaitReadBufferMicroseconds, watch.elapsedMicroseconds());
-
-    [[maybe_unused]] auto download_current_segment = read_type == ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE;
-    chassert(download_current_segment == file_segment.isDownloader());
-
-    chassert(file_segment.range() == range);
-    chassert(file_offset_of_buffer_end >= range.left && file_offset_of_buffer_end <= range.right);

    LOG_TEST(
        log,
@ -406,6 +396,15 @@ CachedOnDiskReadBufferFromFile::getImplementationBuffer(FileSegment & file_segme
        read_buffer_for_file_segment->getFileOffsetOfBufferEnd(),
        file_segment.getInfoForLog());

+    current_file_segment_counters.increment(
+        ProfileEvents::FileSegmentWaitReadBufferMicroseconds, watch.elapsedMicroseconds());
+
+    [[maybe_unused]] auto download_current_segment = read_type == ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE;
+    chassert(download_current_segment == file_segment.isDownloader());
+
+    chassert(file_segment.range() == range);
+    chassert(file_offset_of_buffer_end >= range.left && file_offset_of_buffer_end <= range.right);
+
    read_buffer_for_file_segment->setReadUntilPosition(range.right + 1); /// [..., range.right]

    switch (read_type)
--- a/src/Disks/IO/ThreadPoolReader.cpp
+++ b/src/Disks/IO/ThreadPoolReader.cpp
@ -32,7 +32,7 @@
        #define SYS_preadv2 327
    #elif defined(__aarch64__)
        #define SYS_preadv2 286
-    #elif defined(__ppc64__)
+    #elif defined(__powerpc64__)
        #define SYS_preadv2 380
    #elif defined(__riscv)
        #define SYS_preadv2 286
--- a/src/Functions/FunctionFactory.cpp
+++ b/src/Functions/FunctionFactory.cpp
@ -29,7 +29,7 @@ const String & getFunctionCanonicalNameIfAny(const String & name)
 void FunctionFactory::registerFunction(
    const std::string & name,
    FunctionCreator creator,
-    Documentation doc,
+    FunctionDocumentation doc,
    CaseSensitiveness case_sensitiveness)
 {
    if (!functions.emplace(name, FunctionFactoryData{creator, doc}).second)
@ -141,7 +141,7 @@ FunctionFactory & FunctionFactory::instance()
    return ret;
 }

-Documentation FunctionFactory::getDocumentation(const std::string & name) const
+FunctionDocumentation FunctionFactory::getDocumentation(const std::string & name) const
 {
    auto it = functions.find(name);
    if (it == functions.end())
--- a/src/Functions/FunctionFactory.h
+++ b/src/Functions/FunctionFactory.h
@ -3,7 +3,7 @@
 #include <Interpreters/Context_fwd.h>
 #include <Common/register_objects.h>
 #include <Common/IFactoryWithAliases.h>
-#include <Common/Documentation.h>
+#include <Common/FunctionDocumentation.h>
 #include <Functions/IFunction.h>
 #include <Functions/IFunctionAdaptors.h>

@ -17,7 +17,7 @@ namespace DB
 {

 using FunctionCreator = std::function<FunctionOverloadResolverPtr(ContextPtr)>;
-using FunctionFactoryData = std::pair<FunctionCreator, Documentation>;
+using FunctionFactoryData = std::pair<FunctionCreator, FunctionDocumentation>;

 /** Creates function by name.
  * Function could use for initialization (take ownership of shared_ptr, for example)
@ -29,13 +29,13 @@ public:
    static FunctionFactory & instance();

    template <typename Function>
-    void registerFunction(Documentation doc = {}, CaseSensitiveness case_sensitiveness = CaseSensitive)
+    void registerFunction(FunctionDocumentation doc = {}, CaseSensitiveness case_sensitiveness = CaseSensitive)
    {
        registerFunction<Function>(Function::name, std::move(doc), case_sensitiveness);
    }

    template <typename Function>
-    void registerFunction(const std::string & name, Documentation doc = {}, CaseSensitiveness case_sensitiveness = CaseSensitive)
+    void registerFunction(const std::string & name, FunctionDocumentation doc = {}, CaseSensitiveness case_sensitiveness = CaseSensitive)
    {
        if constexpr (std::is_base_of_v<IFunction, Function>)
            registerFunction(name, &adaptFunctionToOverloadResolver<Function>, std::move(doc), case_sensitiveness);
@ -63,10 +63,10 @@ public:
    void registerFunction(
        const std::string & name,
        FunctionCreator creator,
-        Documentation doc = {},
+        FunctionDocumentation doc = {},
        CaseSensitiveness case_sensitiveness = CaseSensitive);

-    Documentation getDocumentation(const std::string & name) const;
+    FunctionDocumentation getDocumentation(const std::string & name) const;

 private:
    using Functions = std::unordered_map<std::string, Value>;
--- a/src/Functions/FunctionToDecimalString.cpp
+++ b/src/Functions/FunctionToDecimalString.cpp
@ -8,14 +8,14 @@ namespace DB
 REGISTER_FUNCTION(ToDecimalString)
 {
    factory.registerFunction<FunctionToDecimalString>(
-        {
-            R"(
+        FunctionDocumentation{
+            .description=R"(
 Returns string representation of a number. First argument is the number of any numeric type,
 second argument is the desired number of digits in fractional part. Returns String.

        )",
-            Documentation::Examples{{"toDecimalString", "SELECT toDecimalString(2.1456,2)"}},
-            Documentation::Categories{"String"}
+            .examples{{"toDecimalString", "SELECT toDecimalString(2.1456,2)", ""}},
+            .categories{"String"}
        }, FunctionFactory::CaseInsensitive);
 }

--- a/src/Functions/FunctionsCodingULID.cpp
+++ b/src/Functions/FunctionsCodingULID.cpp
@ -169,17 +169,17 @@ public:

 REGISTER_FUNCTION(ULIDStringToDateTime)
 {
-    factory.registerFunction<FunctionULIDStringToDateTime>(
+    factory.registerFunction<FunctionULIDStringToDateTime>(FunctionDocumentation
        {
-            R"(
+            .description=R"(
 This function extracts the timestamp from a ULID and returns it as a DateTime64(3) typed value.
 The function expects the ULID to be provided as the first argument, which can be either a String or a FixedString(26) data type.
 An optional second argument can be passed to specify a timezone for the timestamp.
 )",
-            Documentation::Examples{
-                {"ulid", "SELECT ULIDStringToDateTime(generateULID())"},
-                {"timezone", "SELECT ULIDStringToDateTime(generateULID(), 'Asia/Istanbul')"}},
-            Documentation::Categories{"ULID"}
+            .examples{
+                {"ulid", "SELECT ULIDStringToDateTime(generateULID())", ""},
+                {"timezone", "SELECT ULIDStringToDateTime(generateULID(), 'Asia/Istanbul')", ""}},
+            .categories{"ULID"}
        },
        FunctionFactory::CaseSensitive);
 }
--- a/src/Functions/FunctionsExternalDictionaries.cpp
+++ b/src/Functions/FunctionsExternalDictionaries.cpp
@ -47,69 +47,69 @@ Returned value: value of the dictionary attribute parsed in the attribute’s da
 Throws an exception if cannot parse the value of the attribute or the value does not match the attribute data type.
 )" };

-    factory.registerFunction<FunctionDictGetNoType<DictionaryGetFunctionType::get>>(Documentation{ fmt::format(dict_get_description, "attribute’s data type") });
-    factory.registerFunction<FunctionDictGetNoType<DictionaryGetFunctionType::getOrDefault>>(Documentation{ fmt::format(dict_get_or_default_description, "attribute’s data type") });
-    factory.registerFunction<FunctionDictGetOrNull>(Documentation{ dict_get_or_null_description });
+    factory.registerFunction<FunctionDictGetNoType<DictionaryGetFunctionType::get>>(FunctionDocumentation{ .description=fmt::format(dict_get_description, "attribute’s data type") });
+    factory.registerFunction<FunctionDictGetNoType<DictionaryGetFunctionType::getOrDefault>>(FunctionDocumentation{ .description=fmt::format(dict_get_or_default_description, "attribute’s data type") });
+    factory.registerFunction<FunctionDictGetOrNull>(FunctionDocumentation{ .description=dict_get_or_null_description });

-    factory.registerFunction<FunctionDictGetUInt8>(Documentation{ fmt::format(dict_get_description, "UInt8") });
-    factory.registerFunction<FunctionDictGetUInt16>(Documentation{ fmt::format(dict_get_description, "UInt16") });
-    factory.registerFunction<FunctionDictGetUInt32>(Documentation{ fmt::format(dict_get_description, "UInt32") });
-    factory.registerFunction<FunctionDictGetUInt64>(Documentation{ fmt::format(dict_get_description, "UInt64") });
-    factory.registerFunction<FunctionDictGetInt8>(Documentation{ fmt::format(dict_get_description, "Int8") });
-    factory.registerFunction<FunctionDictGetInt16>(Documentation{ fmt::format(dict_get_description, "Int16") });
-    factory.registerFunction<FunctionDictGetInt32>(Documentation{ fmt::format(dict_get_description, "Int32") });
-    factory.registerFunction<FunctionDictGetInt64>(Documentation{ fmt::format(dict_get_description, "Int64") });
-    factory.registerFunction<FunctionDictGetFloat32>(Documentation{ fmt::format(dict_get_description, "Float32") });
-    factory.registerFunction<FunctionDictGetFloat64>(Documentation{ fmt::format(dict_get_description, "Float64") });
-    factory.registerFunction<FunctionDictGetDate>(Documentation{ fmt::format(dict_get_description, "Date") });
-    factory.registerFunction<FunctionDictGetDateTime>(Documentation{ fmt::format(dict_get_description, "DateTime") });
-    factory.registerFunction<FunctionDictGetUUID>(Documentation{ fmt::format(dict_get_description, "UUID") });
-    factory.registerFunction<FunctionDictGetIPv4>(Documentation{ fmt::format(dict_get_description, "IPv4") });
-    factory.registerFunction<FunctionDictGetIPv6>(Documentation{ fmt::format(dict_get_description, "IPv6") });
-    factory.registerFunction<FunctionDictGetString>(Documentation{ fmt::format(dict_get_description, "String") });
+    factory.registerFunction<FunctionDictGetUInt8>(FunctionDocumentation{ .description=fmt::format(dict_get_description, "UInt8") });
+    factory.registerFunction<FunctionDictGetUInt16>(FunctionDocumentation{ .description=fmt::format(dict_get_description, "UInt16") });
+    factory.registerFunction<FunctionDictGetUInt32>(FunctionDocumentation{ .description=fmt::format(dict_get_description, "UInt32") });
+    factory.registerFunction<FunctionDictGetUInt64>(FunctionDocumentation{ .description=fmt::format(dict_get_description, "UInt64") });
+    factory.registerFunction<FunctionDictGetInt8>(FunctionDocumentation{ .description=fmt::format(dict_get_description, "Int8") });
+    factory.registerFunction<FunctionDictGetInt16>(FunctionDocumentation{ .description=fmt::format(dict_get_description, "Int16") });
+    factory.registerFunction<FunctionDictGetInt32>(FunctionDocumentation{ .description=fmt::format(dict_get_description, "Int32") });
+    factory.registerFunction<FunctionDictGetInt64>(FunctionDocumentation{ .description=fmt::format(dict_get_description, "Int64") });
+    factory.registerFunction<FunctionDictGetFloat32>(FunctionDocumentation{ .description=fmt::format(dict_get_description, "Float32") });
+    factory.registerFunction<FunctionDictGetFloat64>(FunctionDocumentation{ .description=fmt::format(dict_get_description, "Float64") });
+    factory.registerFunction<FunctionDictGetDate>(FunctionDocumentation{ .description=fmt::format(dict_get_description, "Date") });
+    factory.registerFunction<FunctionDictGetDateTime>(FunctionDocumentation{ .description=fmt::format(dict_get_description, "DateTime") });
+    factory.registerFunction<FunctionDictGetUUID>(FunctionDocumentation{ .description=fmt::format(dict_get_description, "UUID") });
+    factory.registerFunction<FunctionDictGetIPv4>(FunctionDocumentation{ .description=fmt::format(dict_get_description, "IPv4") });
+    factory.registerFunction<FunctionDictGetIPv6>(FunctionDocumentation{ .description=fmt::format(dict_get_description, "IPv6") });
+    factory.registerFunction<FunctionDictGetString>(FunctionDocumentation{ .description=fmt::format(dict_get_description, "String") });

-    factory.registerFunction<FunctionDictGetUInt8OrDefault>(Documentation{ fmt::format(dict_get_or_default_description, "UInt8") });
-    factory.registerFunction<FunctionDictGetUInt16OrDefault>(Documentation{ fmt::format(dict_get_or_default_description, "UInt16") });
-    factory.registerFunction<FunctionDictGetUInt32OrDefault>(Documentation{ fmt::format(dict_get_or_default_description, "UInt32") });
-    factory.registerFunction<FunctionDictGetUInt64OrDefault>(Documentation{ fmt::format(dict_get_or_default_description, "UInt64") });
-    factory.registerFunction<FunctionDictGetInt8OrDefault>(Documentation{ fmt::format(dict_get_or_default_description, "Int8") });
-    factory.registerFunction<FunctionDictGetInt16OrDefault>(Documentation{ fmt::format(dict_get_or_default_description, "Int16") });
-    factory.registerFunction<FunctionDictGetInt32OrDefault>(Documentation{ fmt::format(dict_get_or_default_description, "Int32") });
-    factory.registerFunction<FunctionDictGetInt64OrDefault>(Documentation{ fmt::format(dict_get_or_default_description, "Int64") });
-    factory.registerFunction<FunctionDictGetFloat32OrDefault>(Documentation{ fmt::format(dict_get_or_default_description, "Float32") });
-    factory.registerFunction<FunctionDictGetFloat64OrDefault>(Documentation{ fmt::format(dict_get_or_default_description, "Float64") });
-    factory.registerFunction<FunctionDictGetDateOrDefault>(Documentation{ fmt::format(dict_get_or_default_description, "Date") });
-    factory.registerFunction<FunctionDictGetDateTimeOrDefault>(Documentation{ fmt::format(dict_get_or_default_description, "DateTime") });
-    factory.registerFunction<FunctionDictGetUUIDOrDefault>(Documentation{ fmt::format(dict_get_or_default_description, "UUID") });
-    factory.registerFunction<FunctionDictGetIPv4OrDefault>(Documentation{ fmt::format(dict_get_or_default_description, "IPv4") });
-    factory.registerFunction<FunctionDictGetIPv6OrDefault>(Documentation{ fmt::format(dict_get_or_default_description, "IPv6") });
-    factory.registerFunction<FunctionDictGetStringOrDefault>(Documentation{ fmt::format(dict_get_or_default_description, "String") });
+    factory.registerFunction<FunctionDictGetUInt8OrDefault>(FunctionDocumentation{ .description=fmt::format(dict_get_or_default_description, "UInt8") });
+    factory.registerFunction<FunctionDictGetUInt16OrDefault>(FunctionDocumentation{ .description=fmt::format(dict_get_or_default_description, "UInt16") });
+    factory.registerFunction<FunctionDictGetUInt32OrDefault>(FunctionDocumentation{ .description=fmt::format(dict_get_or_default_description, "UInt32") });
+    factory.registerFunction<FunctionDictGetUInt64OrDefault>(FunctionDocumentation{ .description=fmt::format(dict_get_or_default_description, "UInt64") });
+    factory.registerFunction<FunctionDictGetInt8OrDefault>(FunctionDocumentation{ .description=fmt::format(dict_get_or_default_description, "Int8") });
+    factory.registerFunction<FunctionDictGetInt16OrDefault>(FunctionDocumentation{ .description=fmt::format(dict_get_or_default_description, "Int16") });
+    factory.registerFunction<FunctionDictGetInt32OrDefault>(FunctionDocumentation{ .description=fmt::format(dict_get_or_default_description, "Int32") });
+    factory.registerFunction<FunctionDictGetInt64OrDefault>(FunctionDocumentation{ .description=fmt::format(dict_get_or_default_description, "Int64") });
+    factory.registerFunction<FunctionDictGetFloat32OrDefault>(FunctionDocumentation{ .description=fmt::format(dict_get_or_default_description, "Float32") });
+    factory.registerFunction<FunctionDictGetFloat64OrDefault>(FunctionDocumentation{ .description=fmt::format(dict_get_or_default_description, "Float64") });
+    factory.registerFunction<FunctionDictGetDateOrDefault>(FunctionDocumentation{ .description=fmt::format(dict_get_or_default_description, "Date") });
+    factory.registerFunction<FunctionDictGetDateTimeOrDefault>(FunctionDocumentation{ .description=fmt::format(dict_get_or_default_description, "DateTime") });
+    factory.registerFunction<FunctionDictGetUUIDOrDefault>(FunctionDocumentation{ .description=fmt::format(dict_get_or_default_description, "UUID") });
+    factory.registerFunction<FunctionDictGetIPv4OrDefault>(FunctionDocumentation{ .description=fmt::format(dict_get_or_default_description, "IPv4") });
+    factory.registerFunction<FunctionDictGetIPv6OrDefault>(FunctionDocumentation{ .description=fmt::format(dict_get_or_default_description, "IPv6") });
+    factory.registerFunction<FunctionDictGetStringOrDefault>(FunctionDocumentation{ .description=fmt::format(dict_get_or_default_description, "String") });

-    factory.registerFunction<FunctionDictHas>(Documentation{ R"(
+    factory.registerFunction<FunctionDictHas>(FunctionDocumentation{ .description=R"(
 Checks whether a key is present in a dictionary.
 Accepts 2 parameters: name of the dictionary, key value - expression returning dictionary key-type value or tuple-type value - depending on the dictionary configuration.
 Returned value: 0 if there is no key, 1 if there is a key, type of UInt8
 )"});

-    factory.registerFunction<FunctionDictGetHierarchy>(Documentation{ R"(
+    factory.registerFunction<FunctionDictGetHierarchy>(FunctionDocumentation{ .description=R"(
 Creates an array, containing all the parents of a key in the hierarchical dictionary.
 Accepts 2 parameters: name of the dictionary, key value - expression returning a UInt64-type value.
 Returned value: parents for the key, type of Array(UInt64)
 )"});

-    factory.registerFunction<FunctionDictIsIn>(Documentation{ R"(
+    factory.registerFunction<FunctionDictIsIn>(FunctionDocumentation{ .description=R"(
 Checks the ancestor of a key through the whole hierarchical chain in the dictionary.
 Accepts 3 parameters: name of the dictionary, key to be checked - expression returning a UInt64-type value, alleged ancestor of the key - expression returning a UInt64-type.
 Returned value: 0 if key is not a child of the ancestor, 1 if key is a child of the ancestor or if key is the ancestor, type of UInt8
 )"});

-    factory.registerFunction<FunctionDictGetChildrenOverloadResolver>(Documentation{ R"(
+    factory.registerFunction<FunctionDictGetChildrenOverloadResolver>(FunctionDocumentation{ .description=R"(
 Returns first-level children as an array of indexes. It is the inverse transformation for dictGetHierarchy.
 Accepts 2 parameters: name of the dictionary, key value - expression returning a UInt64-type value.
 Returned value: first-level descendants for the key, type of Array(UInt64)
 )"});

-    factory.registerFunction<FunctionDictGetDescendantsOverloadResolver>(Documentation{ R"(
+    factory.registerFunction<FunctionDictGetDescendantsOverloadResolver>(FunctionDocumentation{ .description=R"(
 Returns all descendants as if dictGetChildren function was applied level times recursively.
 Accepts 3 parameters: name of the dictionary, key value - expression returning a UInt64-type value, level — hierarchy level - If level = 0 returns all descendants to the end - UInt8
 Returned value: descendants for the key, type of Array(UInt64)
--- a/src/Functions/FunctionsHashingMisc.cpp
+++ b/src/Functions/FunctionsHashingMisc.cpp
@ -15,15 +15,15 @@ REGISTER_FUNCTION(Hashing)
    factory.registerFunction<FunctionSipHash64Keyed>();
    factory.registerFunction<FunctionSipHash128>();
    factory.registerFunction<FunctionSipHash128Keyed>();
-    factory.registerFunction<FunctionSipHash128Reference>({
-        "Like [sipHash128](#hash_functions-siphash128) but implements the 128-bit algorithm from the original authors of SipHash.",
-        Documentation::Examples{{"hash", "SELECT hex(sipHash128Reference('foo', '\\x01', 3))"}},
-        Documentation::Categories{"Hash"}
+    factory.registerFunction<FunctionSipHash128Reference>(FunctionDocumentation{
+        .description="Like [sipHash128](#hash_functions-siphash128) but implements the 128-bit algorithm from the original authors of SipHash.",
+        .examples{{"hash", "SELECT hex(sipHash128Reference('foo', '\\x01', 3))", ""}},
+        .categories{"Hash"}
    });
-    factory.registerFunction<FunctionSipHash128ReferenceKeyed>({
-        "Same as [sipHash128Reference](#hash_functions-siphash128reference) but additionally takes an explicit key argument instead of using a fixed key.",
-        Documentation::Examples{{"hash", "SELECT hex(sipHash128ReferenceKeyed((506097522914230528, 1084818905618843912),'foo', '\\x01', 3));"}},
-        Documentation::Categories{"Hash"}
+    factory.registerFunction<FunctionSipHash128ReferenceKeyed>(FunctionDocumentation{
+        .description="Same as [sipHash128Reference](#hash_functions-siphash128reference) but additionally takes an explicit key argument instead of using a fixed key.",
+        .examples{{"hash", "SELECT hex(sipHash128ReferenceKeyed((506097522914230528, 1084818905618843912),'foo', '\\x01', 3));", ""}},
+        .categories{"Hash"}
    });
    factory.registerFunction<FunctionCityHash64>();
    factory.registerFunction<FunctionFarmFingerprint64>();
@ -37,10 +37,10 @@ REGISTER_FUNCTION(Hashing)
    factory.registerFunction<FunctionXxHash32>();
    factory.registerFunction<FunctionXxHash64>();
    factory.registerFunction<FunctionXXH3>(
-        {
-            "Calculates value of XXH3 64-bit hash function. Refer to https://github.com/Cyan4973/xxHash for detailed documentation.",
-            Documentation::Examples{{"hash", "SELECT xxh3('ClickHouse')"}},
-            Documentation::Categories{"Hash"}
+        FunctionDocumentation{
+            .description="Calculates value of XXH3 64-bit hash function. Refer to https://github.com/Cyan4973/xxHash for detailed documentation.",
+            .examples{{"hash", "SELECT xxh3('ClickHouse')", ""}},
+            .categories{"Hash"}
        },
        FunctionFactory::CaseSensitive);

@ -48,16 +48,16 @@ REGISTER_FUNCTION(Hashing)


    factory.registerFunction<FunctionBLAKE3>(
-    {
-        R"(
+    FunctionDocumentation{
+        .description=R"(
 Calculates BLAKE3 hash string and returns the resulting set of bytes as FixedString.
 This cryptographic hash-function is integrated into ClickHouse with BLAKE3 Rust library.
 The function is rather fast and shows approximately two times faster performance compared to SHA-2, while generating hashes of the same length as SHA-256.
 It returns a BLAKE3 hash as a byte array with type FixedString(32).
 )",
-        Documentation::Examples{
-            {"hash", "SELECT hex(BLAKE3('ABC'))"}},
-        Documentation::Categories{"Hash"}
+        .examples{
+            {"hash", "SELECT hex(BLAKE3('ABC'))", ""}},
+        .categories{"Hash"}
    },
    FunctionFactory::CaseSensitive);
 }
--- a/src/Functions/FunctionsStringSimilarity.cpp
+++ b/src/Functions/FunctionsStringSimilarity.cpp
@ -108,6 +108,10 @@ struct NgramDistanceImpl

        if constexpr (case_insensitive)
        {
+#if defined(MEMORY_SANITIZER)
+            /// Due to PODArray padding accessing more elements should be OK
+            __msan_unpoison(code_points + (N - 1), padding_offset * sizeof(CodePoint));
+#endif
            /// We really need template lambdas with C++20 to do it inline
            unrollLowering<N - 1>(code_points, std::make_index_sequence<padding_offset>());
        }
--- a/src/Functions/JSONArrayLength.cpp
+++ b/src/Functions/JSONArrayLength.cpp
@ -100,8 +100,8 @@ namespace

 REGISTER_FUNCTION(JSONArrayLength)
 {
-    factory.registerFunction<FunctionJSONArrayLength>(Documentation{
-        "Returns the number of elements in the outermost JSON array. The function returns NULL if input JSON string is invalid."});
+    factory.registerFunction<FunctionJSONArrayLength>(FunctionDocumentation{
+        .description="Returns the number of elements in the outermost JSON array. The function returns NULL if input JSON string is invalid."});

    /// For Spark compatibility.
    factory.registerAlias("JSON_ARRAY_LENGTH", "JSONArrayLength", FunctionFactory::CaseInsensitive);
--- a/src/Functions/URL/cutToFirstSignificantSubdomain.cpp
+++ b/src/Functions/URL/cutToFirstSignificantSubdomain.cpp
@ -44,32 +44,32 @@ using FunctionCutToFirstSignificantSubdomainWithWWWRFC = FunctionStringToString<
 REGISTER_FUNCTION(CutToFirstSignificantSubdomain)
 {
    factory.registerFunction<FunctionCutToFirstSignificantSubdomain>(
-        {
-        R"(Returns the part of the domain that includes top-level subdomains up to the "first significant subdomain" (see documentation of the `firstSignificantSubdomain`).)",
-        Documentation::Examples{
-            {"cutToFirstSignificantSubdomain1", "SELECT cutToFirstSignificantSubdomain('https://news.clickhouse.com.tr/')"},
-            {"cutToFirstSignificantSubdomain2", "SELECT cutToFirstSignificantSubdomain('www.tr')"},
-            {"cutToFirstSignificantSubdomain3", "SELECT cutToFirstSignificantSubdomain('tr')"},
+        FunctionDocumentation{
+        .description=R"(Returns the part of the domain that includes top-level subdomains up to the "first significant subdomain" (see documentation of the `firstSignificantSubdomain`).)",
+        .examples{
+            {"cutToFirstSignificantSubdomain1", "SELECT cutToFirstSignificantSubdomain('https://news.clickhouse.com.tr/')", ""},
+            {"cutToFirstSignificantSubdomain2", "SELECT cutToFirstSignificantSubdomain('www.tr')", ""},
+            {"cutToFirstSignificantSubdomain3", "SELECT cutToFirstSignificantSubdomain('tr')", ""},
        },
-        Documentation::Categories{"URL"}
+        .categories{"URL"}
        });
    factory.registerFunction<FunctionCutToFirstSignificantSubdomainWithWWW>(
-        {
-            R"(Returns the part of the domain that includes top-level subdomains up to the "first significant subdomain", without stripping "www".)",
-            Documentation::Examples{},
-            Documentation::Categories{"URL"}
+        FunctionDocumentation{
+            .description=R"(Returns the part of the domain that includes top-level subdomains up to the "first significant subdomain", without stripping "www".)",
+            .examples{},
+            .categories{"URL"}
        });
    factory.registerFunction<FunctionCutToFirstSignificantSubdomainRFC>(
-        {
-            R"(Similar to `cutToFirstSignificantSubdomain` but follows stricter rules to be compatible with RFC 3986 and less performant.)",
-            Documentation::Examples{},
-            Documentation::Categories{"URL"}
+        FunctionDocumentation{
+            .description=R"(Similar to `cutToFirstSignificantSubdomain` but follows stricter rules to be compatible with RFC 3986 and less performant.)",
+            .examples{},
+            .categories{"URL"}
        });
    factory.registerFunction<FunctionCutToFirstSignificantSubdomainWithWWWRFC>(
-        {
-            R"(Similar to `cutToFirstSignificantSubdomainWithWWW` but follows stricter rules to be compatible with RFC 3986 and less performant.)",
-            Documentation::Examples{},
-            Documentation::Categories{"URL"}
+        FunctionDocumentation{
+            .description=R"(Similar to `cutToFirstSignificantSubdomainWithWWW` but follows stricter rules to be compatible with RFC 3986 and less performant.)",
+            .examples{},
+            .categories{"URL"}
        });
 }

--- a/src/Functions/URL/cutToFirstSignificantSubdomainCustom.cpp
+++ b/src/Functions/URL/cutToFirstSignificantSubdomainCustom.cpp
@ -43,39 +43,39 @@ using FunctionCutToFirstSignificantSubdomainCustomWithWWWRFC = FunctionCutToFirs
 REGISTER_FUNCTION(CutToFirstSignificantSubdomainCustom)
 {
    factory.registerFunction<FunctionCutToFirstSignificantSubdomainCustom>(
-        {
-        R"(
+        FunctionDocumentation{
+        .description=R"(
 Returns the part of the domain that includes top-level subdomains up to the first significant subdomain. Accepts custom TLD list name.

 Can be useful if you need fresh TLD list or you have custom.
        )",
-        Documentation::Examples{
-            {"cutToFirstSignificantSubdomainCustom", "SELECT cutToFirstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list');"},
+        .examples{
+            {"cutToFirstSignificantSubdomainCustom", "SELECT cutToFirstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list');", ""},
        },
-        Documentation::Categories{"URL"}
+        .categories{"URL"}
        });
    factory.registerFunction<FunctionCutToFirstSignificantSubdomainCustomWithWWW>(
-        {
-        R"(
+        FunctionDocumentation{
+        .description=R"(
 Returns the part of the domain that includes top-level subdomains up to the first significant subdomain without stripping `www`.
 Accepts custom TLD list name from config.

 Can be useful if you need fresh TLD list or you have custom.
        )",
-        Documentation::Examples{{"cutToFirstSignificantSubdomainCustomWithWWW", "SELECT cutToFirstSignificantSubdomainCustomWithWWW('www.foo', 'public_suffix_list')"}},
-        Documentation::Categories{"URL"}
+        .examples{{"cutToFirstSignificantSubdomainCustomWithWWW", "SELECT cutToFirstSignificantSubdomainCustomWithWWW('www.foo', 'public_suffix_list')", ""}},
+        .categories{"URL"}
        });
    factory.registerFunction<FunctionCutToFirstSignificantSubdomainCustomRFC>(
-        {
-        R"(Similar to `cutToFirstSignificantSubdomainCustom` but follows stricter rules according to RFC 3986.)",
-        Documentation::Examples{},
-        Documentation::Categories{"URL"}
+        FunctionDocumentation{
+        .description=R"(Similar to `cutToFirstSignificantSubdomainCustom` but follows stricter rules according to RFC 3986.)",
+        .examples{},
+        .categories{"URL"}
        });
    factory.registerFunction<FunctionCutToFirstSignificantSubdomainCustomWithWWWRFC>(
-        {
-        R"(Similar to `cutToFirstSignificantSubdomainCustomWithWWW` but follows stricter rules according to RFC 3986.)",
-        Documentation::Examples{},
-        Documentation::Categories{"URL"}
+        FunctionDocumentation{
+        .description=R"(Similar to `cutToFirstSignificantSubdomainCustomWithWWW` but follows stricter rules according to RFC 3986.)",
+        .examples{},
+        .categories{"URL"}
        });
 }

--- a/src/Functions/URL/domain.cpp
+++ b/src/Functions/URL/domain.cpp
@ -14,23 +14,23 @@ using FunctionDomainRFC = FunctionStringToString<ExtractSubstringImpl<ExtractDom

 REGISTER_FUNCTION(Domain)
 {
-    factory.registerFunction<FunctionDomain>(
+    factory.registerFunction<FunctionDomain>(FunctionDocumentation
        {
-        R"(
+        .description=R"(
 Extracts the hostname from a URL.

 The URL can be specified with or without a scheme.
 If the argument can't be parsed as URL, the function returns an empty string.
        )",
-        Documentation::Examples{{"domain", "SELECT domain('svn+ssh://some.svn-hosting.com:80/repo/trunk')"}},
-        Documentation::Categories{"URL"}
+        .examples{{"domain", "SELECT domain('svn+ssh://some.svn-hosting.com:80/repo/trunk')", ""}},
+        .categories{"URL"}
        });

-    factory.registerFunction<FunctionDomainRFC>(
+    factory.registerFunction<FunctionDomainRFC>(FunctionDocumentation
        {
-        R"(Similar to `domain` but follows stricter rules to be compatible with RFC 3986 and less performant.)",
-        Documentation::Examples{},
-        Documentation::Categories{"URL"}
+        .description=R"(Similar to `domain` but follows stricter rules to be compatible with RFC 3986 and less performant.)",
+        .examples{},
+        .categories{"URL"}
        });
 }

--- a/src/Functions/URL/domainWithoutWWW.cpp
+++ b/src/Functions/URL/domainWithoutWWW.cpp
@ -15,21 +15,21 @@ using FunctionDomainWithoutWWWRFC = FunctionStringToString<ExtractSubstringImpl<
 REGISTER_FUNCTION(DomainWithoutWWW)
 {
    factory.registerFunction<FunctionDomainWithoutWWW>(
-    {
-        R"(
+    FunctionDocumentation{
+        .description=R"(
 Extracts the hostname from a URL, removing the leading "www." if present.

 The URL can be specified with or without a scheme.
 If the argument can't be parsed as URL, the function returns an empty string.
        )",
-        Documentation::Examples{{"domainWithoutWWW", "SELECT domainWithoutWWW('https://www.clickhouse.com')"}},
-        Documentation::Categories{"URL"}
+        .examples{{"domainWithoutWWW", "SELECT domainWithoutWWW('https://www.clickhouse.com')", ""}},
+        .categories{"URL"}
    });
    factory.registerFunction<FunctionDomainWithoutWWWRFC>(
-    {
-        R"(Similar to `domainWithoutWWW` but follows stricter rules to be compatible with RFC 3986 and less performant.)",
-        Documentation::Examples{},
-        Documentation::Categories{"URL"}
+    FunctionDocumentation{
+        .description=R"(Similar to `domainWithoutWWW` but follows stricter rules to be compatible with RFC 3986 and less performant.)",
+        .examples{},
+        .categories{"URL"}
    });
 }

--- a/src/Functions/URL/firstSignificantSubdomain.cpp
+++ b/src/Functions/URL/firstSignificantSubdomain.cpp
@ -15,8 +15,8 @@ using FunctionFirstSignificantSubdomainRFC = FunctionStringToString<ExtractSubst
 REGISTER_FUNCTION(FirstSignificantSubdomain)
 {
    factory.registerFunction<FunctionFirstSignificantSubdomain>(
-        {
-        R"(
+        FunctionDocumentation{
+        .description=R"(
 Returns the "first significant subdomain".

 The first significant subdomain is a second-level domain if it is 'com', 'net', 'org', or 'co'.
@ -26,15 +26,15 @@ For example, firstSignificantSubdomain('https://news.clickhouse.com/') = 'clickh

 The list of "insignificant" second-level domains and other implementation details may change in the future.
        )",
-        Documentation::Examples{{"firstSignificantSubdomain", "SELECT firstSignificantSubdomain('https://news.clickhouse.com/')"}},
-        Documentation::Categories{"URL"}
+        .examples{{"firstSignificantSubdomain", "SELECT firstSignificantSubdomain('https://news.clickhouse.com/')", ""}},
+        .categories{"URL"}
        });

    factory.registerFunction<FunctionFirstSignificantSubdomainRFC>(
-        {
-        R"(Returns the "first significant subdomain" according to RFC 1034.)",
-        Documentation::Examples{},
-        Documentation::Categories{"URL"}
+        FunctionDocumentation{
+        .description=R"(Returns the "first significant subdomain" according to RFC 1034.)",
+        .examples{},
+        .categories{"URL"}
        });
 }

--- a/src/Functions/URL/port.cpp
+++ b/src/Functions/URL/port.cpp
@ -138,17 +138,15 @@ struct FunctionPortRFC : public FunctionPortImpl<true>

 REGISTER_FUNCTION(Port)
 {
-    factory.registerFunction<FunctionPort>(
+    factory.registerFunction<FunctionPort>(FunctionDocumentation
    {
-        R"(Returns the port or `default_port` if there is no port in the URL (or in case of validation error).)",
-        Documentation::Examples{},
-        Documentation::Categories{"URL"}
+        .description=R"(Returns the port or `default_port` if there is no port in the URL (or in case of validation error).)",
+        .categories{"URL"}
    });
-    factory.registerFunction<FunctionPortRFC>(
+    factory.registerFunction<FunctionPortRFC>(FunctionDocumentation
    {
-        R"(Similar to `port`, but conforms to RFC 3986.)",
-        Documentation::Examples{},
-        Documentation::Categories{"URL"}
+        .description=R"(Similar to `port`, but conforms to RFC 3986.)",
+        .categories{"URL"}
    });
 }

--- a/src/Functions/URL/topLevelDomain.cpp
+++ b/src/Functions/URL/topLevelDomain.cpp
@ -53,22 +53,22 @@ using FunctionTopLevelDomainRFC = FunctionStringToString<ExtractSubstringImpl<Ex

 REGISTER_FUNCTION(TopLevelDomain)
 {
-    factory.registerFunction<FunctionTopLevelDomain>(
+    factory.registerFunction<FunctionTopLevelDomain>(FunctionDocumentation
    {
-        R"(
+        .description=R"(
 Extracts the the top-level domain from a URL.

 Returns an empty string if the argument cannot be parsed as a URL or does not contain a top-level domain.
        )",
-        Documentation::Examples{{"topLevelDomain", "SELECT topLevelDomain('svn+ssh://www.some.svn-hosting.com:80/repo/trunk')"}},
-        Documentation::Categories{"URL"}
+        .examples{{"topLevelDomain", "SELECT topLevelDomain('svn+ssh://www.some.svn-hosting.com:80/repo/trunk')", ""}},
+        .categories{"URL"}
    });

-    factory.registerFunction<FunctionTopLevelDomainRFC>(
+    factory.registerFunction<FunctionTopLevelDomainRFC>(FunctionDocumentation
    {
-        R"(Similar to topLevelDomain, but conforms to RFC 3986.)",
-        Documentation::Examples{},
-        Documentation::Categories{"URL"}
+        .description=R"(Similar to topLevelDomain, but conforms to RFC 3986.)",
+        .examples{},
+        .categories{"URL"}
    });
 }

--- a/src/Functions/UTCTimestamp.cpp
+++ b/src/Functions/UTCTimestamp.cpp
@ -107,17 +107,17 @@ public:
 /// UTC_timestamp for MySQL interface support
 REGISTER_FUNCTION(UTCTimestamp)
 {
-    factory.registerFunction<UTCTimestampOverloadResolver>({
-        R"(
+    factory.registerFunction<UTCTimestampOverloadResolver>(FunctionDocumentation{
+        .description=R"(
 Returns the current date and time at the moment of query analysis. The function is a constant expression.
 Same as `now('UTC')`. Was added only for MySQL support. `now` is preferred.

 Example:
 [example:typical]
 )",
-    Documentation::Examples{
-        {"typical", "SELECT UTCTimestamp();"}},
-    Documentation::Categories{"Dates and Times"}}, FunctionFactory::CaseInsensitive);
+    .examples{
+        {"typical", "SELECT UTCTimestamp();", ""}},
+    .categories{"Dates and Times"}}, FunctionFactory::CaseInsensitive);
    factory.registerAlias("UTC_timestamp", UTCTimestampOverloadResolver::name, FunctionFactory::CaseInsensitive);
 }

--- a/src/Functions/UniqTheta/FunctionsUniqTheta.cpp
+++ b/src/Functions/UniqTheta/FunctionsUniqTheta.cpp
@ -10,8 +10,7 @@ namespace DB
 REGISTER_FUNCTION(UniqTheta)
 {
    factory.registerFunction<FunctionUniqThetaIntersect>(
-            {
-              R"(
+            FunctionDocumentation{.description = R"(
 Two uniqThetaSketch objects to do intersect calculation(set operation ∩), the result is a new uniqThetaSketch.

 A uniqThetaSketch object is to be constructed by aggregation function uniqTheta with -State.
@ -22,14 +21,13 @@ For more information on RoaringBitmap, see: [Theta Sketch Framework](https://dat
 Typical usage:
 [example:typical]
 )",
-                    Documentation::Examples{
-                            {"typical", "select finalizeAggregation(uniqThetaIntersect(arrayReduce('uniqThetaState',[1,2]), arrayReduce('uniqThetaState',[2,3,4])));"}},
-                    Documentation::Categories{"uniqTheta"}
+                    .examples{
+                            {"typical", "select finalizeAggregation(uniqThetaIntersect(arrayReduce('uniqThetaState',[1,2]), arrayReduce('uniqThetaState',[2,3,4])));", ""}},
+                    .categories{"uniqTheta"}
            });

    factory.registerFunction<FunctionUniqThetaUnion>(
-            {
-              R"(
+            FunctionDocumentation{.description = R"(
 Two uniqThetaSketch objects to do union calculation(set operation ∪), the result is a new uniqThetaSketch.

 A uniqThetaSketch object is to be constructed by aggregation function uniqTheta with -State.
@ -40,13 +38,12 @@ For more information on RoaringBitmap, see: [Theta Sketch Framework](https://dat
 Typical usage:
 [example:typical]
 )",
-                    Documentation::Examples{
-                            {"typical", "select finalizeAggregation(uniqThetaUnion(arrayReduce('uniqThetaState',[1,2]), arrayReduce('uniqThetaState',[2,3,4])));"}},
-                    Documentation::Categories{"uniqTheta"}
+                    .examples{
+                            {"typical", "select finalizeAggregation(uniqThetaUnion(arrayReduce('uniqThetaState',[1,2]), arrayReduce('uniqThetaState',[2,3,4])));", ""}},
+                    .categories{"uniqTheta"}
            });
    factory.registerFunction<FunctionUniqThetaNot>(
-            {
-              R"(
+            FunctionDocumentation{.description = R"(
 Two uniqThetaSketch objects to do a_not_b calculation(set operation ×), the result is a new uniqThetaSketch.

 A uniqThetaSketch object is to be constructed by aggregation function uniqTheta with -State.
@ -57,9 +54,9 @@ For more information on RoaringBitmap, see: [Theta Sketch Framework](https://dat
 Typical usage:
 [example:typical]
 )",
-                    Documentation::Examples{
-                            {"typical", "select finalizeAggregation(uniqThetaNot(arrayReduce('uniqThetaState',[1,2]), arrayReduce('uniqThetaState',[2,3,4])));"}},
-                    Documentation::Categories{"uniqTheta"}
+                    .examples{
+                            {"typical", "select finalizeAggregation(uniqThetaNot(arrayReduce('uniqThetaState',[1,2]), arrayReduce('uniqThetaState',[2,3,4])));", ""}},
+                    .categories{"uniqTheta"}
            });
 }

--- a/src/Functions/array/FunctionArrayMapped.h
+++ b/src/Functions/array/FunctionArrayMapped.h
@ -135,7 +135,7 @@ public:

        size_t num_function_arguments = function_type->getArgumentTypes().size();
        if (is_single_array_argument
-            && tuple_argument_size
+            && tuple_argument_size > 1
            && tuple_argument_size == num_function_arguments)
        {
            assert(nested_types.size() == 1);
@ -337,12 +337,13 @@ public:
                }

                const auto * column_tuple = checkAndGetColumn<ColumnTuple>(&column_array->getData());
-                if (is_single_array_argument && column_tuple && column_tuple->getColumns().size() == num_function_arguments)
+                size_t tuple_size = column_tuple ? column_tuple->getColumns().size() : 0;
+
+                if (is_single_array_argument && tuple_size > 1 && tuple_size == num_function_arguments)
                {
                    const auto & type_tuple = assert_cast<const DataTypeTuple &>(*array_type->getNestedType());
                    const auto & tuple_names = type_tuple.getElementNames();

-                    size_t tuple_size = column_tuple->getColumns().size();
                    arrays.reserve(column_tuple->getColumns().size());
                    for (size_t j = 0; j < tuple_size; ++j)
                    {
--- a/src/Functions/array/FunctionsMapMiscellaneous.cpp
+++ b/src/Functions/array/FunctionsMapMiscellaneous.cpp
@ -363,101 +363,101 @@ using FunctionMapPartialReverseSort = FunctionMapToArrayAdapter<FunctionArrayPar
 REGISTER_FUNCTION(MapMiscellaneous)
 {
    factory.registerFunction<FunctionMapConcat>(
-    {
-        "The same as arrayConcat.",
-        Documentation::Examples{{"mapConcat", "SELECT mapConcat(map('k1', 'v1'), map('k2', 'v2'))"}},
-        Documentation::Categories{"Map"},
+    FunctionDocumentation{
+        .description="The same as arrayConcat.",
+        .examples{{"mapConcat", "SELECT mapConcat(map('k1', 'v1'), map('k2', 'v2'))", ""}},
+        .categories{"Map"},
    });

    factory.registerFunction<FunctionMapKeys>(
-    {
-        "Returns an array with the keys of map.",
-        Documentation::Examples{{"mapKeys", "SELECT mapKeys(map('k1', 'v1', 'k2', 'v2'))"}},
-        Documentation::Categories{"Map"},
+    FunctionDocumentation{
+        .description="Returns an array with the keys of map.",
+        .examples{{"mapKeys", "SELECT mapKeys(map('k1', 'v1', 'k2', 'v2'))", ""}},
+        .categories{"Map"},
    });

    factory.registerFunction<FunctionMapValues>(
-    {
-        "Returns an array with the values of map.",
-        Documentation::Examples{{"mapValues", "SELECT mapValues(map('k1', 'v1', 'k2', 'v2'))"}},
-        Documentation::Categories{"Map"},
+    FunctionDocumentation{
+        .description="Returns an array with the values of map.",
+        .examples{{"mapValues", "SELECT mapValues(map('k1', 'v1', 'k2', 'v2'))", ""}},
+        .categories{"Map"},
    });

    factory.registerFunction<FunctionMapContains>(
-    {
-        "Checks whether the map has the specified key.",
-        Documentation::Examples{{"mapContains", "SELECT mapContains(map('k1', 'v1', 'k2', 'v2'), 'k1')"}},
-        Documentation::Categories{"Map"},
+    FunctionDocumentation{
+        .description="Checks whether the map has the specified key.",
+        .examples{{"mapContains", "SELECT mapContains(map('k1', 'v1', 'k2', 'v2'), 'k1')", ""}},
+        .categories{"Map"},
    });

    factory.registerFunction<FunctionMapFilter>(
-    {
-        "The same as arrayFilter.",
-        Documentation::Examples{{"mapFilter", "SELECT mapFilter((k, v) -> v > 1, map('k1', 1, 'k2', 2))"}},
-        Documentation::Categories{"Map"},
+    FunctionDocumentation{
+        .description="The same as arrayFilter.",
+        .examples{{"mapFilter", "SELECT mapFilter((k, v) -> v > 1, map('k1', 1, 'k2', 2))", ""}},
+        .categories{"Map"},
    });

    factory.registerFunction<FunctionMapApply>(
-    {
-        "The same as arrayMap.",
-        Documentation::Examples{{"mapApply", "SELECT mapApply((k, v) -> (k, v * 2), map('k1', 1, 'k2', 2))"}},
-        Documentation::Categories{"Map"},
+    FunctionDocumentation{
+        .description="The same as arrayMap.",
+        .examples{{"mapApply", "SELECT mapApply((k, v) -> (k, v * 2), map('k1', 1, 'k2', 2))", ""}},
+        .categories{"Map"},
    });

    factory.registerFunction<FunctionMapExists>(
-    {
-        "The same as arrayExists.",
-        Documentation::Examples{{"mapExists", "SELECT mapExists((k, v) -> v = 1, map('k1', 1, 'k2', 2))"}},
-        Documentation::Categories{"Map"},
+    FunctionDocumentation{
+        .description="The same as arrayExists.",
+        .examples{{"mapExists", "SELECT mapExists((k, v) -> v = 1, map('k1', 1, 'k2', 2))", ""}},
+        .categories{"Map"},
    });

    factory.registerFunction<FunctionMapAll>(
-    {
-        "The same as arrayAll.",
-        Documentation::Examples{{"mapAll", "SELECT mapAll((k, v) -> v = 1, map('k1', 1, 'k2', 2))"}},
-        Documentation::Categories{"Map"},
+    FunctionDocumentation{
+        .description="The same as arrayAll.",
+        .examples{{"mapAll", "SELECT mapAll((k, v) -> v = 1, map('k1', 1, 'k2', 2))", ""}},
+        .categories{"Map"},
    });

    factory.registerFunction<FunctionMapSort>(
-    {
-        "The same as arraySort.",
-        Documentation::Examples{{"mapSort", "SELECT mapSort((k, v) -> v, map('k1', 3, 'k2', 1, 'k3', 2))"}},
-        Documentation::Categories{"Map"},
+    FunctionDocumentation{
+        .description="The same as arraySort.",
+        .examples{{"mapSort", "SELECT mapSort((k, v) -> v, map('k1', 3, 'k2', 1, 'k3', 2))", ""}},
+        .categories{"Map"},
    });

    factory.registerFunction<FunctionMapReverseSort>(
-    {
-        "The same as arrayReverseSort.",
-        Documentation::Examples{{"mapReverseSort", "SELECT mapReverseSort((k, v) -> v, map('k1', 3, 'k2', 1, 'k3', 2))"}},
-        Documentation::Categories{"Map"},
+    FunctionDocumentation{
+        .description="The same as arrayReverseSort.",
+        .examples{{"mapReverseSort", "SELECT mapReverseSort((k, v) -> v, map('k1', 3, 'k2', 1, 'k3', 2))", ""}},
+        .categories{"Map"},
    });

    factory.registerFunction<FunctionMapPartialSort>(
-    {
-        "The same as arrayReverseSort.",
-        Documentation::Examples{{"mapPartialSort", "SELECT mapPartialSort((k, v) -> v, 2, map('k1', 3, 'k2', 1, 'k3', 2))"}},
-        Documentation::Categories{"Map"},
+    FunctionDocumentation{
+        .description="The same as arrayReverseSort.",
+        .examples{{"mapPartialSort", "SELECT mapPartialSort((k, v) -> v, 2, map('k1', 3, 'k2', 1, 'k3', 2))", ""}},
+        .categories{"Map"},
    });

    factory.registerFunction<FunctionMapPartialReverseSort>(
-    {
-        "The same as arrayPartialReverseSort.",
-        Documentation::Examples{{"mapPartialReverseSort", "SELECT mapPartialReverseSort((k, v) -> v, 2, map('k1', 3, 'k2', 1, 'k3', 2))"}},
-        Documentation::Categories{"Map"},
+    FunctionDocumentation{
+        .description="The same as arrayPartialReverseSort.",
+        .examples{{"mapPartialReverseSort", "SELECT mapPartialReverseSort((k, v) -> v, 2, map('k1', 3, 'k2', 1, 'k3', 2))", ""}},
+        .categories{"Map"},
    });

    factory.registerFunction<FunctionMapContainsKeyLike>(
-    {
-        "Checks whether map contains key LIKE specified pattern.",
-        Documentation::Examples{{"mapContainsKeyLike", "SELECT mapContainsKeyLike(map('k1-1', 1, 'k2-1', 2), 'k1%')"}},
-        Documentation::Categories{"Map"},
+    FunctionDocumentation{
+        .description="Checks whether map contains key LIKE specified pattern.",
+        .examples{{"mapContainsKeyLike", "SELECT mapContainsKeyLike(map('k1-1', 1, 'k2-1', 2), 'k1%')", ""}},
+        .categories{"Map"},
    });

    factory.registerFunction<FunctionMapExtractKeyLike>(
-    {
-        "Returns a map with elements which key matches the specified pattern.",
-        Documentation::Examples{{"mapExtractKeyLike", "SELECT mapExtractKeyLike(map('k1-1', 1, 'k2-1', 2), 'k1%')"}},
-        Documentation::Categories{"Map"},
+    FunctionDocumentation{
+        .description="Returns a map with elements which key matches the specified pattern.",
+        .examples{{"mapExtractKeyLike", "SELECT mapExtractKeyLike(map('k1-1', 1, 'k2-1', 2), 'k1%')", ""}},
+        .categories{"Map"},
    });
 }

--- a/Show More
+++ b/Show More
				`@ -0,0 +1 @@`
				`Subproject commit b85edbde4cf974b1b40d27828a56f0505f4e2ee5`