Merge branch 'master' into hanfei/gwp-asan

2024-09-19 16:20:50 +00:00 · 2023-01-13 20:26:34 +01:00 · 2023-01-13 20:26:34 +01:00 · 7feb9b7f6f
commit 7feb9b7f6f
parent 214830c569 9d5ec474a3
304 changed files with 9515 additions and 2988 deletions
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@ -19,6 +19,9 @@ tests/ci/run_check.py
 ...

 ### Documentation entry for user-facing changes
+
+- [ ] Documentation is written (mandatory for new features)
+
 <!---
 Directly edit documentation source files in the "docs" folder with the same pull-request as code changes

--- a/.gitmodules
+++ b/.gitmodules
@ -327,3 +327,6 @@
 [submodule "contrib/aws-s2n-tls"]
 	path = contrib/aws-s2n-tls
 	url = https://github.com/ClickHouse/s2n-tls
+[submodule "contrib/crc32-vpmsum"]
+	path = contrib/crc32-vpmsum
+	url = https://github.com/antonblanchard/crc32-vpmsum.git
--- a/base/base/defines.h
+++ b/base/base/defines.h
@ -144,6 +144,13 @@
 #    define TSA_REQUIRES_SHARED(...) __attribute__((requires_shared_capability(__VA_ARGS__)))  /// thread needs shared possession of given capability
 #    define TSA_ACQUIRED_AFTER(...) __attribute__((acquired_after(__VA_ARGS__)))               /// annotated lock must be locked after given lock
 #    define TSA_NO_THREAD_SAFETY_ANALYSIS __attribute__((no_thread_safety_analysis))           /// disable TSA for a function
+#    define TSA_CAPABILITY(...) __attribute__((capability(__VA_ARGS__)))                       /// object of a class can be used as capability
+#    define TSA_ACQUIRE(...) __attribute__((acquire_capability(__VA_ARGS__)))                        /// function acquires a capability, but does not release it
+#    define TSA_TRY_ACQUIRE(...) __attribute__((try_acquire_capability(__VA_ARGS__)))                /// function tries to acquire a capability and returns a boolean value indicating success or failure
+#    define TSA_RELEASE(...) __attribute__((release_capability(__VA_ARGS__)))                        /// function releases the given capability
+#    define TSA_ACQUIRE_SHARED(...) __attribute__((acquire_shared_capability(__VA_ARGS__)))          /// function acquires a shared capability, but does not release it
+#    define TSA_TRY_ACQUIRE_SHARED(...) __attribute__((try_acquire_shared_capability(__VA_ARGS__)))  /// function tries to acquire a shared capability and returns a boolean value indicating success or failure
+#    define TSA_RELEASE_SHARED(...) __attribute__((release_shared_capability(__VA_ARGS__)))          /// function releases the given shared capability

 /// Macros for suppressing TSA warnings for specific reads/writes (instead of suppressing it for the whole function)
 /// They use a lambda function to apply function attribute to a single statement. This enable us to suppress warnings locally instead of
@ -164,6 +171,13 @@
 #    define TSA_REQUIRES(...)
 #    define TSA_REQUIRES_SHARED(...)
 #    define TSA_NO_THREAD_SAFETY_ANALYSIS
+#    define TSA_CAPABILITY(...)
+#    define TSA_ACQUIRE(...)
+#    define TSA_TRY_ACQUIRE(...)
+#    define TSA_RELEASE(...)
+#    define TSA_ACQUIRE_SHARED(...)
+#    define TSA_TRY_ACQUIRE_SHARED(...)
+#    define TSA_RELEASE_SHARED(...)

 #    define TSA_SUPPRESS_WARNING_FOR_READ(x) (x)
 #    define TSA_SUPPRESS_WARNING_FOR_WRITE(x) (x)
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@ -55,6 +55,7 @@ else ()
 endif ()
 add_contrib (miniselect-cmake miniselect)
 add_contrib (pdqsort-cmake pdqsort)
+add_contrib (crc32-vpmsum-cmake crc32-vpmsum)
 add_contrib (sparsehash-c11-cmake sparsehash-c11)
 add_contrib (abseil-cpp-cmake abseil-cpp)
 add_contrib (magic-enum-cmake magic_enum)
--- a/contrib/azure
+++ b/contrib/azure
@ -1 +1 @@
-Subproject commit ef75afc075fc71fbcd8fe28dcda3794ae265fd1c
+Subproject commit ea8c3044f43f5afa7016d2d580ed201f495d7e94
--- a/contrib/crc32-vpmsum
+++ b/contrib/crc32-vpmsum
@ -0,0 +1 @@
+Subproject commit 452155439389311fc7d143621eaf56a258e02476
--- a/contrib/crc32-vpmsum-cmake/CMakeLists.txt
+++ b/contrib/crc32-vpmsum-cmake/CMakeLists.txt
@ -0,0 +1,14 @@
+# module crc32-vpmsum gets build along with the files vec_crc32.h and crc32_constants.h in crc32-vpmsum-cmake
+# Please see README.md for information about how to generate crc32_constants.h
+if (NOT ARCH_PPC64LE)
+	message (STATUS "crc32-vpmsum library is only supported on ppc64le")
+    return()
+endif()
+
+SET(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/crc32-vpmsum")
+
+add_library(_crc32-vpmsum 
+	"${LIBRARY_DIR}/vec_crc32.c"
+	)
+target_include_directories(_crc32-vpmsum SYSTEM BEFORE PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}")
+add_library(ch_contrib::crc32-vpmsum ALIAS _crc32-vpmsum)
--- a/contrib/crc32-vpmsum-cmake/README.md
+++ b/contrib/crc32-vpmsum-cmake/README.md
@ -0,0 +1,9 @@
+# To Generate crc32_constants.h 
+
+- Run make file in `../crc32-vpmsum` directory using following options and CRC polynomial. These options should use the same polynomial and order used by intel intrinisic functions
+```bash
+make crc32_constants.h CRC="0x11EDC6F41" OPTIONS="-x -r -c"
+```
+- move the generated `crc32_constants.h` into this directory
+- To understand more about this go here: https://masterchef2209.wordpress.com/2020/06/17/guide-to-intel-sse4-2-crc-intrinisics-implementation-for-simde/
+- Here is the link to information about intel intrinsic functions: https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_crc32_u64&ig_expand=1492,1493,1559
--- a/contrib/crc32-vpmsum-cmake/crc32_constants.h
+++ b/contrib/crc32-vpmsum-cmake/crc32_constants.h
--- a/contrib/crc32-vpmsum-cmake/vec_crc32.h
+++ b/contrib/crc32-vpmsum-cmake/vec_crc32.h
@ -0,0 +1,26 @@
+#ifndef VEC_CRC32
+#define VEC_CRC32
+
+#if ! ((defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+#    error PowerPC architecture is expected
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+unsigned int crc32_vpmsum(unsigned int crc, const unsigned char *p, unsigned long len);
+
+static inline uint32_t crc32_ppc(uint64_t crc, unsigned char const *buffer, size_t len)
+{
+    assert(buffer);
+    crc = crc32_vpmsum(crc, buffer, (unsigned long)len);
+
+    return crc;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/docker/test/integration/runner/compose/docker_compose_kerberos_kdc.yml
+++ b/docker/test/integration/runner/compose/docker_compose_kerberos_kdc.yml
@ -0,0 +1,11 @@
+version: '2.3'
+
+services:
+  kerberoskdc:
+    image: clickhouse/kerberos-kdc:${DOCKER_KERBEROS_KDC_TAG:-latest}
+    hostname: kerberoskdc
+    volumes:
+      - ${KERBEROS_KDC_DIR}/secrets:/tmp/keytab
+      - ${KERBEROS_KDC_DIR}/../kerberos_image_config.sh:/config.sh
+      - /dev/urandom:/dev/random
+    ports: [88, 749]
--- a/docker/test/sqlancer/Dockerfile
+++ b/docker/test/sqlancer/Dockerfile
@ -5,12 +5,18 @@ FROM ubuntu:22.04
 ARG apt_archive="http://archive.ubuntu.com"
 RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list

-RUN apt-get update --yes && env DEBIAN_FRONTEND=noninteractive apt-get install wget unzip git default-jdk maven python3 --yes --no-install-recommends
-RUN wget https://github.com/sqlancer/sqlancer/archive/master.zip -O /sqlancer.zip
+RUN apt-get update --yes && \
+	env DEBIAN_FRONTEND=noninteractive apt-get install wget git default-jdk maven python3 --yes --no-install-recommends && \
+	apt-get clean
+
+# We need to get the repository's HEAD each time despite, so we invalidate layers' cache
+ARG CACHE_INVALIDATOR=0
 RUN mkdir /sqlancer && \
-	cd /sqlancer && \
-	unzip /sqlancer.zip
-RUN cd /sqlancer/sqlancer-master && mvn package -DskipTests
+	wget -q -O- https://github.com/sqlancer/sqlancer/archive/master.tar.gz | \
+		tar zx -C /sqlancer && \
+	cd /sqlancer/sqlancer-master && \
+	mvn package -DskipTests && \
+	rm -r /root/.m2

 COPY run.sh /
 COPY process_sqlancer_result.py /
--- a/docker/test/stress/run.sh
+++ b/docker/test/stress/run.sh
@ -128,18 +128,12 @@ EOL

 function stop()
 {
+    local max_tries="${1:-90}"
    local pid
    # Preserve the pid, since the server can hung after the PID will be deleted.
    pid="$(cat /var/run/clickhouse-server/clickhouse-server.pid)"

-    clickhouse stop $max_tries --do-not-kill && return
-
-    if [ -n "$1" ]
-    then
-        # temporarily disable it in BC check
-        clickhouse stop --force
-        return
-    fi
+    clickhouse stop --max-tries "$max_tries" --do-not-kill && return

    # We failed to stop the server with SIGTERM. Maybe it hang, let's collect stacktraces.
    kill -TERM "$(pidof gdb)" ||:
@ -465,7 +459,8 @@ if [ "$DISABLE_BC_CHECK" -ne "1" ]; then
            clickhouse stop --force
        )

-        stop 1
+        # Use bigger timeout for previous version
+        stop 300
        mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.backward.stress.log

        # Start new server
@ -598,7 +593,7 @@ clickhouse-local --structure "test String, res String" -q "SELECT 'failure', tes
 [ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv

 # Core dumps
-find . -type f -name 'core.*' | while read core; do
+find . -type f -maxdepth 1 -name 'core.*' | while read core; do
    zstd --threads=0 $core
    mv $core.zst /test_output/
 done
--- a/docker/test/style/process_style_check_result.py
+++ b/docker/test/style/process_style_check_result.py
@ -6,6 +6,8 @@ import argparse
 import csv


+# TODO: add typing and log files to the fourth column, think about launching
+# everything from the python and not bash
 def process_result(result_folder):
    status = "success"
    description = ""
--- a/docs/changelogs/v22.3.16.1190-lts.md
+++ b/docs/changelogs/v22.3.16.1190-lts.md
@ -7,186 +7,27 @@ sidebar_label: 2023

 ### ClickHouse release v22.3.16.1190-lts (bb4e0934e5a) FIXME as compared to v22.10.1.1877-stable (98ab5a3c189)

-#### Backward Incompatible Change
-* JSONExtract family of functions will now attempt to coerce to the request type. [#41502](https://github.com/ClickHouse/ClickHouse/pull/41502) ([Márcio Martins](https://github.com/marcioapm)).
-* Backported in [#43484](https://github.com/ClickHouse/ClickHouse/issues/43484): Fixed backward incompatibility in (de)serialization of states of `min`, `max`, `any*`, `argMin`, `argMax` aggregate functions with `String` argument. The incompatibility was introduced in https://github.com/ClickHouse/ClickHouse/pull/41431 and affects 22.9, 22.10 and 22.11 branches (fixed since 22.9.6, 22.10.4 and 22.11.2 correspondingly). Some minor releases of 22.3, 22.7 and 22.8 branches are also affected: 22.3.13...22.3.14 (fixed since 22.3.15), 22.8.6...22.8.9 (fixed since 22.8.10), 22.7.6 and newer (will not be fixed in 22.7, we recommend to upgrade from 22.7.* to 22.8.10 or newer). This release note does not concern users that have never used affected versions. Incompatible versions append extra `'\0'` to strings when reading states of the aggregate functions mentioned above. For example, if an older version saved state of `anyState('foobar')` to `state_column` then incompatible version will print `'foobar\0'` on `anyMerge(state_column)`. Also incompatible versions write states of the aggregate functions without trailing `'\0'`. Newer versions (that have the fix) can correctly read data written by all versions including incompatible versions, except one corner case. If an incompatible version saved a state with a string that actually ends with null character, then newer version will trim trailing `'\0'` when reading state of affected aggregate function. For example, if an incompatible version saved state of `anyState('abrac\0dabra\0')` to `state_column` then newer versions will print `'abrac\0dabra'` on `anyMerge(state_column)`. The issue also affects distributed queries when an incompatible version works in a cluster together with older or newer versions. [#43038](https://github.com/ClickHouse/ClickHouse/pull/43038) ([Raúl Marín](https://github.com/Algunenano)).
-
-#### New Feature
-* - Add function `displayName`, closes [#36770](https://github.com/ClickHouse/ClickHouse/issues/36770). [#37681](https://github.com/ClickHouse/ClickHouse/pull/37681) ([hongbin](https://github.com/xlwh)).
-* Add Hudi and DeltaLake table engines, read-only, only for tables on S3. [#41054](https://github.com/ClickHouse/ClickHouse/pull/41054) ([Daniil Rubin](https://github.com/rubin-do)).
-* Add 4LW command `csnp` for manually creating snapshots. Additionally, `lgif` was added to get Raft information for a specific node (e.g. index of last created snapshot, last committed log index). [#41766](https://github.com/ClickHouse/ClickHouse/pull/41766) ([JackyWoo](https://github.com/JackyWoo)).
-* Add function ascii like in spark: https://spark.apache.org/docs/latest/api/sql/#ascii. [#42670](https://github.com/ClickHouse/ClickHouse/pull/42670) ([李扬](https://github.com/taiyang-li)).
-* Published function `formatReadableDecimalSize`. [#42774](https://github.com/ClickHouse/ClickHouse/pull/42774) ([Alejandro](https://github.com/alexon1234)).
-
-#### Performance Improvement
-* Currently, the only saturable operators are And and Or, and their code paths are affected by this change. [#42214](https://github.com/ClickHouse/ClickHouse/pull/42214) ([Zhiguo Zhou](https://github.com/ZhiguoZh)).
-* `match` function can use the index if it's a condition on string prefix. This closes [#37333](https://github.com/ClickHouse/ClickHouse/issues/37333). [#42458](https://github.com/ClickHouse/ClickHouse/pull/42458) ([clarkcaoliu](https://github.com/Clark0)).
-* Support parallel parsing for LineAsString input format. This improves performance just slightly. This closes [#42502](https://github.com/ClickHouse/ClickHouse/issues/42502). [#42780](https://github.com/ClickHouse/ClickHouse/pull/42780) ([Kruglov Pavel](https://github.com/Avogar)).
-* Keeper performance improvement: improve commit performance for cases when many different nodes have uncommitted states. This should help with cases when a follower node can't sync fast enough. [#42926](https://github.com/ClickHouse/ClickHouse/pull/42926) ([Antonio Andelic](https://github.com/antonio2368)).
-
 #### Improvement
-* Support type `Object` inside other types, e.g. `Array(JSON)`. [#36969](https://github.com/ClickHouse/ClickHouse/pull/36969) ([Anton Popov](https://github.com/CurtizJ)).
 * Backported in [#42527](https://github.com/ClickHouse/ClickHouse/issues/42527): Fix issue with passing MySQL timeouts for MySQL database engine and MySQL table function. Closes [#34168](https://github.com/ClickHouse/ClickHouse/issues/34168)?notification_referrer_id=NT_kwDOAzsV57MzMDMxNjAzNTY5OjU0MjAzODc5. [#40751](https://github.com/ClickHouse/ClickHouse/pull/40751) ([Kseniia Sumarokova](https://github.com/kssenii)).
-* ClickHouse Client and ClickHouse Local will show progress by default even in non-interactive mode. If `/dev/tty` is available, the progress will be rendered directly to the terminal, without writing to stderr. It allows to get progress even if stderr is redirected to a file, and the file will not be polluted by terminal escape sequences. The progress can be disabled by `--progress false`. This closes [#32238](https://github.com/ClickHouse/ClickHouse/issues/32238). [#42003](https://github.com/ClickHouse/ClickHouse/pull/42003) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
-* 1. Add, subtract and negate operations are now available on Intervals. In case when the types of Intervals are different they will be transformed into the Tuple of those types. 2. A tuple of intervals can be added to or subtracted from a Date/DateTime field. 3. Added parsing of Intervals with different types, for example: `INTERVAL '1 HOUR 1 MINUTE 1 SECOND'`. [#42195](https://github.com/ClickHouse/ClickHouse/pull/42195) ([Nikolay Degterinsky](https://github.com/evillique)).
-* - Add `notLike` to key condition atom map, so condition like `NOT LIKE 'prefix%'` can use primary index. [#42209](https://github.com/ClickHouse/ClickHouse/pull/42209) ([Duc Canh Le](https://github.com/canhld94)).
-* Add support for FixedString input to base64 coding functions. [#42285](https://github.com/ClickHouse/ClickHouse/pull/42285) ([ltrk2](https://github.com/ltrk2)).
-* Add columns `bytes_on_disk` and `path` to `system.detached_parts`. Closes [#42264](https://github.com/ClickHouse/ClickHouse/issues/42264). [#42303](https://github.com/ClickHouse/ClickHouse/pull/42303) ([chen](https://github.com/xiedeyantu)).
-* Added ** glob support for recursive directory traversal to filesystem and S3. resolves [#36316](https://github.com/ClickHouse/ClickHouse/issues/36316). [#42376](https://github.com/ClickHouse/ClickHouse/pull/42376) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
-* Mask passwords and secret keys both in `system.query_log` and `/var/log/clickhouse-server/*.log` and also in error messages. [#42484](https://github.com/ClickHouse/ClickHouse/pull/42484) ([Vitaly Baranov](https://github.com/vitlibar)).
-* Add a new variable call `limit` in query_info, indicating whether this query is a limit-trivial query. If so, we will adjust the approximate total rows for later estimation. Closes [#7071](https://github.com/ClickHouse/ClickHouse/issues/7071). [#42580](https://github.com/ClickHouse/ClickHouse/pull/42580) ([Han Fei](https://github.com/hanfei1991)).
-* Implement `ATTACH` of `MergeTree` table for `s3_plain` disk (plus some fixes for `s3_plain`). [#42628](https://github.com/ClickHouse/ClickHouse/pull/42628) ([Azat Khuzhin](https://github.com/azat)).
-* Fix no progress indication on INSERT FROM INFILE. Closes [#42548](https://github.com/ClickHouse/ClickHouse/issues/42548). [#42634](https://github.com/ClickHouse/ClickHouse/pull/42634) ([chen](https://github.com/xiedeyantu)).
-* Add `min_age_to_force_merge_on_partition_only` setting to optimize old parts for the entire partition only. [#42659](https://github.com/ClickHouse/ClickHouse/pull/42659) ([Antonio Andelic](https://github.com/antonio2368)).
-* Throttling algorithm changed to token bucket. [#42665](https://github.com/ClickHouse/ClickHouse/pull/42665) ([Sergei Trifonov](https://github.com/serxa)).
-* Added new field allow_readonly in system.table_functions to allow using table functions in readonly mode resolves [#42414](https://github.com/ClickHouse/ClickHouse/issues/42414) Implementation: * Added a new field allow_readonly to table system.table_functions. * Updated to use new field allow_readonly to allow using table functions in readonly mode. Testing: * Added a test for filesystem tests/queries/0_stateless/02473_functions_in_readonly_mode.sh Documentation: * Updated the english documentation for Table Functions. [#42708](https://github.com/ClickHouse/ClickHouse/pull/42708) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
-* Allow to use Date32 arguments for formatDateTime and FROM_UNIXTIME functions. [#42737](https://github.com/ClickHouse/ClickHouse/pull/42737) ([Roman Vasin](https://github.com/rvasin)).
-* Backported in [#42839](https://github.com/ClickHouse/ClickHouse/issues/42839): Update tzdata to 2022f. Mexico will no longer observe DST except near the US border: https://www.timeanddate.com/news/time/mexico-abolishes-dst-2022.html. Chihuahua moves to year-round UTC-6 on 2022-10-30. Fiji no longer observes DST. See https://github.com/google/cctz/pull/235 and https://bugs.launchpad.net/ubuntu/+source/tzdata/+bug/1995209. [#42796](https://github.com/ClickHouse/ClickHouse/pull/42796) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
-* Update tzdata to 2022f. Mexico will no longer observe DST except near the US border: https://www.timeanddate.com/news/time/mexico-abolishes-dst-2022.html. Chihuahua moves to year-round UTC-6 on 2022-10-30. Fiji no longer observes DST. See https://github.com/google/cctz/pull/235 and https://bugs.launchpad.net/ubuntu/+source/tzdata/+bug/1995209. [#42796](https://github.com/ClickHouse/ClickHouse/pull/42796) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
-* Add `FailedAsyncInsertQuery` event metric for async inserts. [#42814](https://github.com/ClickHouse/ClickHouse/pull/42814) ([Krzysztof Góralski](https://github.com/kgoralski)).
-* Increase the size of upload part exponentially for backup to S3. [#42833](https://github.com/ClickHouse/ClickHouse/pull/42833) ([Vitaly Baranov](https://github.com/vitlibar)).

 #### Bug Fix
 * Backported in [#43829](https://github.com/ClickHouse/ClickHouse/issues/43829): Updated normaliser to clone the alias ast. resolves [#42452](https://github.com/ClickHouse/ClickHouse/issues/42452) Implementation: * Updated QueryNormalizer to clone alias ast, when its replaced. Previously just assigning the same leads to exception in LogicalExpressinsOptimizer as it would be the same parent being inserted again. * This bug is not seen with new analyser (allow_experimental_analyzer), so no changes for it. I added a test for the same. [#42827](https://github.com/ClickHouse/ClickHouse/pull/42827) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).

 #### Build/Testing/Packaging Improvement
-* Run SQLancer for each pull request and commit to master. [SQLancer](https://github.com/sqlancer/sqlancer) is an OpenSource fuzzer that focuses on automatic detection of logical bugs. [#42397](https://github.com/ClickHouse/ClickHouse/pull/42397) ([Ilya Yatsishin](https://github.com/qoega)).
-* Update to latest zlib-ng. [#42463](https://github.com/ClickHouse/ClickHouse/pull/42463) ([Boris Kuschel](https://github.com/bkuschel)).
-* use llvm `l64.lld` in macOS suppress ld warnings, close [#42282](https://github.com/ClickHouse/ClickHouse/issues/42282). [#42470](https://github.com/ClickHouse/ClickHouse/pull/42470) ([Lloyd-Pottiger](https://github.com/Lloyd-Pottiger)).
-* * Improve bugfix validation check: fix bug with skipping the check, port separate status in CI, run after check labels and style check. Close [#40349](https://github.com/ClickHouse/ClickHouse/issues/40349). [#42702](https://github.com/ClickHouse/ClickHouse/pull/42702) ([Vladimir C](https://github.com/vdimir)).
-* Backported in [#43050](https://github.com/ClickHouse/ClickHouse/issues/43050): Wait for all files are in sync before archiving them in integration tests. [#42891](https://github.com/ClickHouse/ClickHouse/pull/42891) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
-* Wait for all files are in sync before archiving them in integration tests. [#42891](https://github.com/ClickHouse/ClickHouse/pull/42891) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
-* Use https://github.com/matus-chochlik/ctcache for clang-tidy results caching. [#42913](https://github.com/ClickHouse/ClickHouse/pull/42913) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
-* Backported in [#42963](https://github.com/ClickHouse/ClickHouse/issues/42963): Before the fix, the user-defined config was preserved by RPM in `$file.rpmsave`. The PR fixes it and won't replace the user's files from packages. [#42936](https://github.com/ClickHouse/ClickHouse/pull/42936) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
-* Before the fix, the user-defined config was preserved by RPM in `$file.rpmsave`. The PR fixes it and won't replace the user's files from packages. [#42936](https://github.com/ClickHouse/ClickHouse/pull/42936) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
-* Backported in [#43039](https://github.com/ClickHouse/ClickHouse/issues/43039): Add a CI step to mark commits as ready for release; soft-forbid launching a release script from branches but master. [#43017](https://github.com/ClickHouse/ClickHouse/pull/43017) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
-* Add a CI step to mark commits as ready for release; soft-forbid launching a release script from branches but master. [#43017](https://github.com/ClickHouse/ClickHouse/pull/43017) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
 * Backported in [#44109](https://github.com/ClickHouse/ClickHouse/issues/44109): Bring sha512 sums back to the building step. [#44017](https://github.com/ClickHouse/ClickHouse/pull/44017) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
 * Backported in [#44431](https://github.com/ClickHouse/ClickHouse/issues/44431): Kill stress tests after 2.5h in case of hanging process. [#44214](https://github.com/ClickHouse/ClickHouse/pull/44214) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
 * Backported in [#44557](https://github.com/ClickHouse/ClickHouse/issues/44557): Retry the integration tests on compressing errors. [#44529](https://github.com/ClickHouse/ClickHouse/pull/44529) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).

 #### Bug Fix (user-visible misbehavior in official stable or prestable release)

-* Fix schema inference in s3Cluster and improve in hdfsCluster. [#41979](https://github.com/ClickHouse/ClickHouse/pull/41979) ([Kruglov Pavel](https://github.com/Avogar)).
-* Fix retries while reading from http table engines / table function. (retrtiable errors could be retries more times than needed, non-retrialble errors resulted in failed assertion in code). [#42224](https://github.com/ClickHouse/ClickHouse/pull/42224) ([Kseniia Sumarokova](https://github.com/kssenii)).
-* A segmentation fault related to DNS & c-ares has been reported. The below error ocurred in multiple threads: ``` 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008088 [ 356 ] {} <Fatal> BaseDaemon: ######################################## 2022-09-28 15:41:19.008,"2022.09.28 15:41:19.008147 [ 356 ] {} <Fatal> BaseDaemon: (version 22.8.5.29 (official build), build id: 92504ACA0B8E2267) (from thread 353) (no query) Received signal Segmentation fault (11)" 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008196 [ 356 ] {} <Fatal> BaseDaemon: Address: 0xf Access: write. Address not mapped to object. 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008216 [ 356 ] {} <Fatal> BaseDaemon: Stack trace: 0x188f8212 0x1626851b 0x1626a69e 0x16269b3f 0x16267eab 0x13cf8284 0x13d24afc 0x13c5217e 0x14ec2495 0x15ba440f 0x15b9d13b 0x15bb2699 0x1891ccb3 0x1891e00d 0x18ae0769 0x18ade022 0x7f76aa985609 0x7f76aa8aa133 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008274 [ 356 ] {} <Fatal> BaseDaemon: 2. Poco::Net::IPAddress::family() const @ 0x188f8212 in /usr/bin/clickhouse 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008297 [ 356 ] {} <Fatal> BaseDaemon: 3. ? @ 0x1626851b in /usr/bin/clickhouse 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008309 [ 356 ] {} <Fatal> BaseDaemon: 4. ? @ 0x1626a69e in /usr/bin/clickhouse ```. [#42234](https://github.com/ClickHouse/ClickHouse/pull/42234) ([Arthur Passos](https://github.com/arthurpassos)).
-* Fix `LOGICAL_ERROR` `Arguments of 'plus' have incorrect data types` which may happen in PK analysis (monotonicity check). Fix invalid PK analysis for monotonic binary functions with first constant argument. [#42410](https://github.com/ClickHouse/ClickHouse/pull/42410) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
-* Fix incorrect key analysis when key types cannot be inside Nullable. This fixes [#42456](https://github.com/ClickHouse/ClickHouse/issues/42456). [#42469](https://github.com/ClickHouse/ClickHouse/pull/42469) ([Amos Bird](https://github.com/amosbird)).
-* Fix typo in setting name that led to bad usage of schema inference cache while using setting `input_format_csv_use_best_effort_in_schema_inference`. Closes [#41735](https://github.com/ClickHouse/ClickHouse/issues/41735). [#42536](https://github.com/ClickHouse/ClickHouse/pull/42536) ([Kruglov Pavel](https://github.com/Avogar)).
-* Fix create Set with wrong header when data type is LowCardinality. Closes [#42460](https://github.com/ClickHouse/ClickHouse/issues/42460). [#42579](https://github.com/ClickHouse/ClickHouse/pull/42579) ([flynn](https://github.com/ucasfl)).
-* `(U)Int128` and `(U)Int256` values are correctly checked in `PREWHERE`. [#42605](https://github.com/ClickHouse/ClickHouse/pull/42605) ([Antonio Andelic](https://github.com/antonio2368)).
-* Fix a bug in ParserFunction that could have led to a segmentation fault. [#42724](https://github.com/ClickHouse/ClickHouse/pull/42724) ([Nikolay Degterinsky](https://github.com/evillique)).
-* Fix truncate table does not hold lock correctly. [#42728](https://github.com/ClickHouse/ClickHouse/pull/42728) ([flynn](https://github.com/ucasfl)).
-* Fix possible SIGSEGV for web disks when file does not exists (or `OPTIMIZE TABLE FINAL`, that also can got the same error eventually). [#42767](https://github.com/ClickHouse/ClickHouse/pull/42767) ([Azat Khuzhin](https://github.com/azat)).
-* Fix `auth_type` mapping in `system.session_log`, by including `SSL_CERTIFICATE` for the enum values. [#42782](https://github.com/ClickHouse/ClickHouse/pull/42782) ([Miel Donkers](https://github.com/mdonkers)).
-* Fix stack-use-after-return under ASAN build in ParserCreateUserQuery. [#42804](https://github.com/ClickHouse/ClickHouse/pull/42804) ([Nikolay Degterinsky](https://github.com/evillique)).
-* Fix lowerUTF8()/upperUTF8() in case of symbol was in between 16-byte boundary (very frequent case of you have strings > 16 bytes long). [#42812](https://github.com/ClickHouse/ClickHouse/pull/42812) ([Azat Khuzhin](https://github.com/azat)).
-* Additional bound check was added to lz4 decompression routine to fix misbehaviour in case of malformed input. [#42868](https://github.com/ClickHouse/ClickHouse/pull/42868) ([Nikita Taranov](https://github.com/nickitat)).
-* Fix rare possible hung on query cancellation. [#42874](https://github.com/ClickHouse/ClickHouse/pull/42874) ([Azat Khuzhin](https://github.com/azat)).
-* * Fix incorrect saved_block_sample with multiple disjuncts in hash join, close [#42832](https://github.com/ClickHouse/ClickHouse/issues/42832). [#42876](https://github.com/ClickHouse/ClickHouse/pull/42876) ([Vladimir C](https://github.com/vdimir)).
-* A null pointer will be generated when select if as from ‘three table join’ , For example, the SQL:. [#42883](https://github.com/ClickHouse/ClickHouse/pull/42883) ([zzsmdfj](https://github.com/zzsmdfj)).
-* Fix memory sanitizer report in ClusterDiscovery, close [#42763](https://github.com/ClickHouse/ClickHouse/issues/42763). [#42905](https://github.com/ClickHouse/ClickHouse/pull/42905) ([Vladimir C](https://github.com/vdimir)).
-* Fix rare NOT_FOUND_COLUMN_IN_BLOCK error when projection is possible to use but there is no projection available. This fixes [#42771](https://github.com/ClickHouse/ClickHouse/issues/42771) . The bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/25563. [#42938](https://github.com/ClickHouse/ClickHouse/pull/42938) ([Amos Bird](https://github.com/amosbird)).
-* Fix ATTACH TABLE in PostgreSQL database engine if the table contains DATETIME data type. Closes [#42817](https://github.com/ClickHouse/ClickHouse/issues/42817). [#42960](https://github.com/ClickHouse/ClickHouse/pull/42960) ([Kseniia Sumarokova](https://github.com/kssenii)).
-* Fix lambda parsing. Closes [#41848](https://github.com/ClickHouse/ClickHouse/issues/41848). [#42979](https://github.com/ClickHouse/ClickHouse/pull/42979) ([Nikolay Degterinsky](https://github.com/evillique)).
 * Backported in [#43512](https://github.com/ClickHouse/ClickHouse/issues/43512): - Fix several buffer over-reads. [#43159](https://github.com/ClickHouse/ClickHouse/pull/43159) ([Raúl Marín](https://github.com/Algunenano)).
 * Backported in [#43750](https://github.com/ClickHouse/ClickHouse/issues/43750): An issue with the following exception has been reported while trying to read a Parquet file from S3 into ClickHouse:. [#43297](https://github.com/ClickHouse/ClickHouse/pull/43297) ([Arthur Passos](https://github.com/arthurpassos)).
-* Backported in [#43427](https://github.com/ClickHouse/ClickHouse/issues/43427): Fixed queries with `SAMPLE BY` with prewhere optimization on tables using `Merge` engine. [#43315](https://github.com/ClickHouse/ClickHouse/pull/43315) ([Antonio Andelic](https://github.com/antonio2368)).
 * Backported in [#43616](https://github.com/ClickHouse/ClickHouse/issues/43616): Fix sumMap() for Nullable(Decimal()). [#43414](https://github.com/ClickHouse/ClickHouse/pull/43414) ([Azat Khuzhin](https://github.com/azat)).
-* Backported in [#43720](https://github.com/ClickHouse/ClickHouse/issues/43720): Fixed primary key analysis with conditions involving `toString(enum)`. [#43596](https://github.com/ClickHouse/ClickHouse/pull/43596) ([Nikita Taranov](https://github.com/nickitat)).
 * Backported in [#43885](https://github.com/ClickHouse/ClickHouse/issues/43885): Fixed `ALTER ... RESET SETTING` with `ON CLUSTER`. It could be applied to one replica only. Fixes [#43843](https://github.com/ClickHouse/ClickHouse/issues/43843). [#43848](https://github.com/ClickHouse/ClickHouse/pull/43848) ([Elena Torró](https://github.com/elenatorro)).
 * Backported in [#44179](https://github.com/ClickHouse/ClickHouse/issues/44179): Fix undefined behavior in the `quantiles` function, which might lead to uninitialized memory. Found by fuzzer. This closes [#44066](https://github.com/ClickHouse/ClickHouse/issues/44066). [#44067](https://github.com/ClickHouse/ClickHouse/pull/44067) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
 * Backported in [#44283](https://github.com/ClickHouse/ClickHouse/issues/44283): Prevent `ReadonlyReplica` metric from having negative values. [#44220](https://github.com/ClickHouse/ClickHouse/pull/44220) ([Antonio Andelic](https://github.com/antonio2368)).

-#### Build Improvement
-
-* ... Add support for format ipv6 on s390x. [#42412](https://github.com/ClickHouse/ClickHouse/pull/42412) ([Suzy Wang](https://github.com/SuzyWangIBMer)).
-
 #### NO CL ENTRY

-* NO CL ENTRY:  'Revert "Sonar Cloud Workflow"'. [#42725](https://github.com/ClickHouse/ClickHouse/pull/42725) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
 * NO CL ENTRY:  'Fix multipart upload for large S3 object, backport to 22.3'. [#44217](https://github.com/ClickHouse/ClickHouse/pull/44217) ([ianton-ru](https://github.com/ianton-ru)).

-#### NOT FOR CHANGELOG / INSIGNIFICANT
-
-* Build with libcxx(abi) 15 [#42513](https://github.com/ClickHouse/ClickHouse/pull/42513) ([Robert Schulze](https://github.com/rschu1ze)).
-* Sonar Cloud Workflow [#42534](https://github.com/ClickHouse/ClickHouse/pull/42534) ([Julio Jimenez](https://github.com/juliojimenez)).
-* Invalid type in where for Merge table (logical error) [#42576](https://github.com/ClickHouse/ClickHouse/pull/42576) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
-* Fix frequent memory drift message and clarify things in comments [#42582](https://github.com/ClickHouse/ClickHouse/pull/42582) ([Azat Khuzhin](https://github.com/azat)).
-* Try to save `IDataPartStorage` interface [#42618](https://github.com/ClickHouse/ClickHouse/pull/42618) ([Anton Popov](https://github.com/CurtizJ)).
-* Analyzer change setting into allow_experimental_analyzer [#42649](https://github.com/ClickHouse/ClickHouse/pull/42649) ([Maksim Kita](https://github.com/kitaisreal)).
-* Analyzer IQueryTreeNode remove getName method [#42651](https://github.com/ClickHouse/ClickHouse/pull/42651) ([Maksim Kita](https://github.com/kitaisreal)).
-* Minor fix iotest_nonblock build [#42658](https://github.com/ClickHouse/ClickHouse/pull/42658) ([Jordi Villar](https://github.com/jrdi)).
-* Add tests and doc for some url-related functions [#42664](https://github.com/ClickHouse/ClickHouse/pull/42664) ([Vladimir C](https://github.com/vdimir)).
-* Update version_date.tsv and changelogs after v22.10.1.1875-stable [#42676](https://github.com/ClickHouse/ClickHouse/pull/42676) ([robot-clickhouse](https://github.com/robot-clickhouse)).
-* Fix error handling in clickhouse_helper.py [#42678](https://github.com/ClickHouse/ClickHouse/pull/42678) ([Ilya Yatsishin](https://github.com/qoega)).
-* Fix execution of version_helper.py to use git tweaks [#42679](https://github.com/ClickHouse/ClickHouse/pull/42679) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
-* MergeTree indexes use RPNBuilderTree [#42681](https://github.com/ClickHouse/ClickHouse/pull/42681) ([Maksim Kita](https://github.com/kitaisreal)).
-* Always run `BuilderReport` and `BuilderSpecialReport` in all CI types [#42684](https://github.com/ClickHouse/ClickHouse/pull/42684) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
-* Always run `BuilderReport` and `BuilderSpecialReport` in all CI types [#42684](https://github.com/ClickHouse/ClickHouse/pull/42684) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
-* Update version after release [#42699](https://github.com/ClickHouse/ClickHouse/pull/42699) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
-* Update version_date.tsv and changelogs after v22.10.1.1877-stable [#42700](https://github.com/ClickHouse/ClickHouse/pull/42700) ([robot-clickhouse](https://github.com/robot-clickhouse)).
-* OrderByLimitByDuplicateEliminationPass improve performance [#42704](https://github.com/ClickHouse/ClickHouse/pull/42704) ([Maksim Kita](https://github.com/kitaisreal)).
-* Analyzer improve subqueries representation [#42705](https://github.com/ClickHouse/ClickHouse/pull/42705) ([Maksim Kita](https://github.com/kitaisreal)).
-* Update version_date.tsv and changelogs after v22.9.4.32-stable [#42712](https://github.com/ClickHouse/ClickHouse/pull/42712) ([robot-clickhouse](https://github.com/robot-clickhouse)).
-* Update version_date.tsv and changelogs after v22.8.7.34-lts [#42713](https://github.com/ClickHouse/ClickHouse/pull/42713) ([robot-clickhouse](https://github.com/robot-clickhouse)).
-* Update version_date.tsv and changelogs after v22.7.7.24-stable [#42714](https://github.com/ClickHouse/ClickHouse/pull/42714) ([robot-clickhouse](https://github.com/robot-clickhouse)).
-* Move SonarCloud Job to nightly [#42718](https://github.com/ClickHouse/ClickHouse/pull/42718) ([Julio Jimenez](https://github.com/juliojimenez)).
-* Update version_date.tsv and changelogs after v22.8.8.3-lts [#42738](https://github.com/ClickHouse/ClickHouse/pull/42738) ([robot-clickhouse](https://github.com/robot-clickhouse)).
-* Minor fix implicit cast CaresPTRResolver [#42747](https://github.com/ClickHouse/ClickHouse/pull/42747) ([Jordi Villar](https://github.com/jrdi)).
-* Fix build on master [#42752](https://github.com/ClickHouse/ClickHouse/pull/42752) ([Igor Nikonov](https://github.com/devcrafter)).
-* Update version_date.tsv and changelogs after v22.3.14.18-lts [#42759](https://github.com/ClickHouse/ClickHouse/pull/42759) ([robot-clickhouse](https://github.com/robot-clickhouse)).
-* Fix anchor links [#42760](https://github.com/ClickHouse/ClickHouse/pull/42760) ([Sergei Trifonov](https://github.com/serxa)).
-* Update version_date.tsv and changelogs after v22.3.14.23-lts [#42764](https://github.com/ClickHouse/ClickHouse/pull/42764) ([robot-clickhouse](https://github.com/robot-clickhouse)).
-* Update README.md [#42783](https://github.com/ClickHouse/ClickHouse/pull/42783) ([Yuko Takagi](https://github.com/yukotakagi)).
-* Slightly better code with projections [#42794](https://github.com/ClickHouse/ClickHouse/pull/42794) ([Anton Popov](https://github.com/CurtizJ)).
-* Fix some races in MergeTree [#42805](https://github.com/ClickHouse/ClickHouse/pull/42805) ([Alexander Tokmakov](https://github.com/tavplubix)).
-* Fix typo in comments [#42809](https://github.com/ClickHouse/ClickHouse/pull/42809) ([Gabriel](https://github.com/Gabriel39)).
-* Fix compilation of LLVM with cmake cache [#42816](https://github.com/ClickHouse/ClickHouse/pull/42816) ([Azat Khuzhin](https://github.com/azat)).
-* Fix link in docs [#42821](https://github.com/ClickHouse/ClickHouse/pull/42821) ([Sergei Trifonov](https://github.com/serxa)).
-* Link to proper place in docs [#42822](https://github.com/ClickHouse/ClickHouse/pull/42822) ([Sergei Trifonov](https://github.com/serxa)).
-* Fix argument type check in AggregateFunctionAnalysisOfVariance [#42823](https://github.com/ClickHouse/ClickHouse/pull/42823) ([Vladimir C](https://github.com/vdimir)).
-* Tests/lambda analyzer [#42824](https://github.com/ClickHouse/ClickHouse/pull/42824) ([Denny Crane](https://github.com/den-crane)).
-* Fix Missing Quotes - Sonar Nightly [#42831](https://github.com/ClickHouse/ClickHouse/pull/42831) ([Julio Jimenez](https://github.com/juliojimenez)).
-* Add exclusions from the Snyk scan [#42834](https://github.com/ClickHouse/ClickHouse/pull/42834) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
-* Fix Missing Env Vars - Sonar Nightly [#42843](https://github.com/ClickHouse/ClickHouse/pull/42843) ([Julio Jimenez](https://github.com/juliojimenez)).
-* Fix typo [#42855](https://github.com/ClickHouse/ClickHouse/pull/42855) ([GoGoWen](https://github.com/GoGoWen)).
-* Add timezone to 02458_datediff_date32 [#42857](https://github.com/ClickHouse/ClickHouse/pull/42857) ([Vladimir C](https://github.com/vdimir)).
-* Adjust cancel and rerun workflow names to the actual [#42862](https://github.com/ClickHouse/ClickHouse/pull/42862) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
-* Analyzer subquery in JOIN TREE with aggregation [#42865](https://github.com/ClickHouse/ClickHouse/pull/42865) ([Maksim Kita](https://github.com/kitaisreal)).
-* Fix getauxval for sanitizer builds [#42866](https://github.com/ClickHouse/ClickHouse/pull/42866) ([Amos Bird](https://github.com/amosbird)).
-* Update version_date.tsv and changelogs after v22.10.2.11-stable [#42871](https://github.com/ClickHouse/ClickHouse/pull/42871) ([robot-clickhouse](https://github.com/robot-clickhouse)).
-* Validate Query Tree in debug [#42879](https://github.com/ClickHouse/ClickHouse/pull/42879) ([Dmitry Novik](https://github.com/novikd)).
-* changed type name for s3 plain storage [#42890](https://github.com/ClickHouse/ClickHouse/pull/42890) ([Aleksandr](https://github.com/AVMusorin)).
-* Cleanup implementation of regexpReplace(All|One) [#42907](https://github.com/ClickHouse/ClickHouse/pull/42907) ([Robert Schulze](https://github.com/rschu1ze)).
-* Do not show status for Bugfix validate check in non bugfix PRs [#42932](https://github.com/ClickHouse/ClickHouse/pull/42932) ([Vladimir C](https://github.com/vdimir)).
-* fix(typo): Passible -> Possible [#42933](https://github.com/ClickHouse/ClickHouse/pull/42933) ([Yakko Majuri](https://github.com/yakkomajuri)).
-* Pin the cryptography version to not break lambdas [#42934](https://github.com/ClickHouse/ClickHouse/pull/42934) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
-* Fix: bad cast from type DB::ColumnLowCardinality to DB::ColumnString [#42937](https://github.com/ClickHouse/ClickHouse/pull/42937) ([Igor Nikonov](https://github.com/devcrafter)).
-* Attach thread pool for loading parts to the query [#42947](https://github.com/ClickHouse/ClickHouse/pull/42947) ([Azat Khuzhin](https://github.com/azat)).
-* Fix macOS M1 builds due to sprintf deprecation [#42962](https://github.com/ClickHouse/ClickHouse/pull/42962) ([Jordi Villar](https://github.com/jrdi)).
-* Less use of CH-specific bit_cast() [#42968](https://github.com/ClickHouse/ClickHouse/pull/42968) ([Robert Schulze](https://github.com/rschu1ze)).
-* Remove some utils [#42972](https://github.com/ClickHouse/ClickHouse/pull/42972) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
-* Fix a bug in CAST function parser [#42980](https://github.com/ClickHouse/ClickHouse/pull/42980) ([Nikolay Degterinsky](https://github.com/evillique)).
-* Fix old bug to remove `refs/head` from ref name [#42981](https://github.com/ClickHouse/ClickHouse/pull/42981) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
-* Add debug information to nightly builds [#42997](https://github.com/ClickHouse/ClickHouse/pull/42997) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
-* Add `on: workflow_call` to debug CI [#43000](https://github.com/ClickHouse/ClickHouse/pull/43000) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
-* Simple fixes for restart replica description [#43004](https://github.com/ClickHouse/ClickHouse/pull/43004) ([Igor Nikonov](https://github.com/devcrafter)).
-* Cleanup match code [#43006](https://github.com/ClickHouse/ClickHouse/pull/43006) ([Robert Schulze](https://github.com/rschu1ze)).
-* Fix TSan errors (correctly ignore _exit interception) [#43009](https://github.com/ClickHouse/ClickHouse/pull/43009) ([Azat Khuzhin](https://github.com/azat)).
-* fix bandwidth throttlers initialization order [#43015](https://github.com/ClickHouse/ClickHouse/pull/43015) ([Sergei Trifonov](https://github.com/serxa)).
-* Add test for issue [#42520](https://github.com/ClickHouse/ClickHouse/issues/42520) [#43027](https://github.com/ClickHouse/ClickHouse/pull/43027) ([Robert Schulze](https://github.com/rschu1ze)).
-* Fix msan warning [#43065](https://github.com/ClickHouse/ClickHouse/pull/43065) ([Raúl Marín](https://github.com/Algunenano)).
-* Update SECURITY.md on new stable tags [#43365](https://github.com/ClickHouse/ClickHouse/pull/43365) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
-* Use all parameters with prefixes from ssm [#43467](https://github.com/ClickHouse/ClickHouse/pull/43467) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
-* Temporarily disable `test_hive_query` [#43542](https://github.com/ClickHouse/ClickHouse/pull/43542) ([Alexander Tokmakov](https://github.com/tavplubix)).
-* Do not checkout submodules recursively [#43637](https://github.com/ClickHouse/ClickHouse/pull/43637) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
-* Use docker images cache from merged PRs in master and release branches [#43664](https://github.com/ClickHouse/ClickHouse/pull/43664) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
-* Yet another fix for AggregateFunctionMinMaxAny [#43778](https://github.com/ClickHouse/ClickHouse/pull/43778) ([Alexander Tokmakov](https://github.com/tavplubix)).
-* Fix tags workflow [#43942](https://github.com/ClickHouse/ClickHouse/pull/43942) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
-* Assign release PRs [#44055](https://github.com/ClickHouse/ClickHouse/pull/44055) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
-* Fix another bug in AggregateFunctionMinMaxAny [#44091](https://github.com/ClickHouse/ClickHouse/pull/44091) ([Alexander Tokmakov](https://github.com/tavplubix)).
-* Bump libdivide (to gain some new optimizations) [#44132](https://github.com/ClickHouse/ClickHouse/pull/44132) ([Azat Khuzhin](https://github.com/azat)).
-* Add check for submodules sanity [#44386](https://github.com/ClickHouse/ClickHouse/pull/44386) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
-* Implement a custom central checkout action [#44399](https://github.com/ClickHouse/ClickHouse/pull/44399) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
-
--- a/docs/changelogs/v22.3.17.13-lts.md
+++ b/docs/changelogs/v22.3.17.13-lts.md
@ -0,0 +1,16 @@
+---
+sidebar_position: 1
+sidebar_label: 2023
+---
+
+# 2023 Changelog
+
+### ClickHouse release v22.3.17.13-lts (fcc4de7e805) FIXME as compared to v22.3.16.1190-lts (bb4e0934e5a)
+
+#### Improvement
+* Backported in [#45138](https://github.com/ClickHouse/ClickHouse/issues/45138): Allow to use String type instead of Binary in Arrow/Parquet/ORC formats. This PR introduces 3 new settings for it: `output_format_arrow_string_as_string`, `output_format_parquet_string_as_string`, `output_format_orc_string_as_string`. Default value for all settings is `false`. [#37327](https://github.com/ClickHouse/ClickHouse/pull/37327) ([Kruglov Pavel](https://github.com/Avogar)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Improve release scripts [#45074](https://github.com/ClickHouse/ClickHouse/pull/45074) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
--- a/docs/en/engines/table-engines/integrations/deltalake.md
+++ b/docs/en/engines/table-engines/integrations/deltalake.md
@ -0,0 +1,33 @@
+---
+slug: /en/engines/table-engines/integrations/deltalake
+sidebar_label: DeltaLake
+---
+
+# DeltaLake Table Engine
+
+This engine provides a read-only integration with existing [Delta Lake](https://github.com/delta-io/delta) tables in Amazon S3.
+
+## Create Table
+
+Note that the Delta Lake table must already exist in S3, this command does not take DDL parameters to create a new table.
+
+``` sql
+CREATE TABLE deltalake
+    ENGINE = DeltaLake(url, [aws_access_key_id, aws_secret_access_key,])
+```
+
+**Engine parameters**
+
+-   `url` — Bucket url with path to the existing Delta Lake table.
+-   `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user.  You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. For more information see [Using S3 for Data Storage](../mergetree-family/mergetree.md#table_engine-mergetree-s3).
+
+**Example**
+
+```sql
+CREATE TABLE deltalake ENGINE=DeltaLake('http://mars-doc-test.s3.amazonaws.com/clickhouse-bucket-3/test_table/', 'ABC123', 'Abc+123')
+```
+
+## See also
+
+-  [deltaLake table function](../../../sql-reference/table-functions/deltalake.md)
+
--- a/docs/en/engines/table-engines/integrations/hudi.md
+++ b/docs/en/engines/table-engines/integrations/hudi.md
@ -0,0 +1,33 @@
+---
+slug: /en/engines/table-engines/integrations/hudi
+sidebar_label: Hudi
+---
+
+# Hudi Table Engine
+
+This engine provides a read-only integration with existing Apache [Hudi](https://hudi.apache.org/) tables in Amazon S3.
+
+## Create Table
+
+Note that the Hudi table must already exist in S3, this command does not take DDL parameters to create a new table.
+
+``` sql
+CREATE TABLE hudi_table
+    ENGINE = Hudi(url, [aws_access_key_id, aws_secret_access_key,])
+```
+
+**Engine parameters**
+
+-   `url` — Bucket url with the path to an existing Hudi table.
+-   `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user.  You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. For more information see [Using S3 for Data Storage](../mergetree-family/mergetree.md#table_engine-mergetree-s3).
+
+**Example**
+
+```sql
+CREATE TABLE hudi_table ENGINE=Hudi('http://mars-doc-test.s3.amazonaws.com/clickhouse-bucket-3/test_table/', 'ABC123', 'Abc+123')
+```
+
+## See also
+
+-  [hudi table function](/docs/en/sql-reference/table-functions/hudi.md)
+
--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@ -101,7 +101,7 @@ The `TabSeparated` format supports outputting total values (when using WITH TOTA
 SELECT EventDate, count() AS c FROM test.hits GROUP BY EventDate WITH TOTALS ORDER BY EventDate FORMAT TabSeparated
 ```

-``` text
+``` response
 2014-03-17      1406958
 2014-03-18      1383658
 2014-03-19      1405797
@ -177,7 +177,7 @@ INSERT INTO nestedt Values ( 1, [1], ['a'])
 SELECT * FROM nestedt FORMAT TSV
 ```

-``` text
+``` response
 1  [1]    ['a']
 ```

@ -761,7 +761,7 @@ SELECT * FROM json_as_string;

 Result:

-``` text
+``` response
 ┌─json──────────────────────────────┐
 │ {"foo":{"bar":{"x":"y"},"baz":1}} │
 │ {}                                │
@ -782,7 +782,7 @@ SELECT * FROM json_square_brackets;

 Result:

-```text
+```response
 ┌─field──────────────────────┐
 │ {"id": 1, "name": "name1"} │
 │ {"id": 2, "name": "name2"} │
@ -1118,7 +1118,7 @@ When inserting data with `input_format_defaults_for_omitted_fields = 1`, ClickHo

 Consider the `UserActivity` table as an example:

-``` text
+``` response
 ┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┐
 │ 4324182021466249494 │         5 │      146 │   -1 │
 │ 4324182021466249494 │         6 │      185 │    1 │
@ -1127,7 +1127,7 @@ Consider the `UserActivity` table as an example:

 The query `SELECT * FROM UserActivity FORMAT JSONEachRow` returns:

-``` text
+``` response
 {"UserID":"4324182021466249494","PageViews":5,"Duration":146,"Sign":-1}
 {"UserID":"4324182021466249494","PageViews":6,"Duration":185,"Sign":1}
 ```
@ -1171,7 +1171,7 @@ Without this setting, ClickHouse throws an exception.
 SELECT name, value FROM system.settings WHERE name = 'input_format_import_nested_json'
 ```

-``` text
+``` response
 ┌─name────────────────────────────┬─value─┐
 │ input_format_import_nested_json │ 0     │
 └─────────────────────────────────┴───────┘
@ -1181,7 +1181,7 @@ SELECT name, value FROM system.settings WHERE name = 'input_format_import_nested
 INSERT INTO json_each_row_nested FORMAT JSONEachRow {"n": {"s": ["abc", "def"], "i": [1, 23]}}
 ```

-``` text
+``` response
 Code: 117. DB::Exception: Unknown field found while parsing JSONEachRow format: n: (at row 1)
 ```

@ -1191,7 +1191,7 @@ INSERT INTO json_each_row_nested FORMAT JSONEachRow {"n": {"s": ["abc", "def"],
 SELECT * FROM json_each_row_nested
 ```

-``` text
+``` response
 ┌─n.s───────────┬─n.i────┐
 │ ['abc','def'] │ [1,23] │
 └───────────────┴────────┘
@ -1265,7 +1265,7 @@ For input it uses the following correspondence between BSON types and ClickHouse
 | `\x10` int32                             | [Int32/UInt32](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal32](/docs/en/sql-reference/data-types/decimal.md)                                                         |
 | `\x12` int64                             | [Int64/UInt64](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal64](/docs/en/sql-reference/data-types/decimal.md)/[DateTime64](/docs/en/sql-reference/data-types/datetime64.md) |

-Other BSON types are not supported. Also, it performs conversion between different integer types (for example, you can insert BSON int32 value into ClickHouse UInt8). 
+Other BSON types are not supported. Also, it performs conversion between different integer types (for example, you can insert BSON int32 value into ClickHouse UInt8).
 Big integers and decimals (Int128/UInt128/Int256/UInt256/Decimal128/Decimal256) can be parsed from BSON Binary value with `\x00` binary subtype. In this case this format will validate that the size of binary data equals the size of expected value.

 Note: this format don't work properly on Big-Endian platforms.
@ -1300,7 +1300,7 @@ Example (shown for the [PrettyCompact](#prettycompact) format):
 SELECT * FROM t_null
 ```

-``` text
+``` response
 ┌─x─┬────y─┐
 │ 1 │ ᴺᵁᴸᴸ │
 └───┴──────┘
@ -1312,7 +1312,7 @@ Rows are not escaped in Pretty\* formats. Example is shown for the [PrettyCompac
 SELECT 'String with \'quotes\' and \t character' AS Escaping_test
 ```

-``` text
+``` response
 ┌─Escaping_test────────────────────────┐
 │ String with 'quotes' and      character │
 └──────────────────────────────────────┘
@ -1327,7 +1327,7 @@ The Pretty format supports outputting total values (when using WITH TOTALS) and
 SELECT EventDate, count() AS c FROM test.hits GROUP BY EventDate WITH TOTALS ORDER BY EventDate FORMAT PrettyCompact
 ```

-``` text
+``` response
 ┌──EventDate─┬───────c─┐
 │ 2014-03-17 │ 1406958 │
 │ 2014-03-18 │ 1383658 │
@ -1488,7 +1488,7 @@ Example:
 SELECT * FROM t_null FORMAT Vertical
 ```

-``` text
+``` response
 Row 1:
 ──────
 x: 1
@ -1501,7 +1501,7 @@ Rows are not escaped in Vertical format:
 SELECT 'string with \'quotes\' and \t with some special \n characters' AS test FORMAT Vertical
 ```

-``` text
+``` response
 Row 1:
 ──────
 test: string with 'quotes' and      with some special
@ -2319,25 +2319,22 @@ INSERT INTO `test2` VALUES (1),(2),(3);
 Queries:

 ```sql
-:) desc file(dump.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'
-
-DESCRIBE TABLE file(dump.sql, MySQLDump)
-SETTINGS input_format_mysql_dump_table_name = 'test2'
-
-Query id: 25e66c89-e10a-42a8-9b42-1ee8bbbde5ef
+DESCRIBE TABLE file(dump.sql, MySQLDump) SETTINGS input_format_mysql_dump_table_name = 'test2'
+```

+```text
 ┌─name─┬─type────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
 │ x    │ Nullable(Int32) │              │                    │         │                  │                │
 └──────┴─────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
+```

-:) select * from file(dump.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'
-
+```sql
 SELECT *
 FROM file(dump.sql, MySQLDump)
         SETTINGS input_format_mysql_dump_table_name = 'test2'
+```

-Query id: 17d59664-ebce-4053-bb79-d46a516fb590
-
+```text
 ┌─x─┐
 │ 1 │
 │ 2 │
--- a/docs/en/operations/external-authenticators/kerberos.md
+++ b/docs/en/operations/external-authenticators/kerberos.md
@ -22,10 +22,12 @@ To enable Kerberos, one should include `kerberos` section in `config.xml`. This
 - `principal` - canonical service principal name that will be acquired and used when accepting security contexts.
    - This parameter is optional, if omitted, the default principal will be used.

-
 - `realm` - a realm, that will be used to restrict authentication to only those requests whose initiator's realm matches it.
    - This parameter is optional, if omitted, no additional filtering by realm will be applied.

+- `keytab` - path to service keytab file.
+    - This parameter is optional, if omitted, path to service keytab file must be set in `KRB5_KTNAME` environment variable.
+
 Example (goes into `config.xml`):

 ```xml
--- a/docs/en/operations/settings/merge-tree-settings.md
+++ b/docs/en/operations/settings/merge-tree-settings.md
@ -643,3 +643,106 @@ Default value: `0` (limit never applied).
 ``` xml
 <min_marks_to_honor_max_concurrent_queries>10</min_marks_to_honor_max_concurrent_queries>
 ```
+
+## ratio_of_defaults_for_sparse_serialization {#ratio_of_defaults_for_sparse_serialization}
+
+Minimal ratio of the number of _default_ values to the number of _all_ values in a column. Setting this value causes the column to be stored using sparse serializations.
+
+If a column is sparse (contains mostly zeros), ClickHouse can encode it in a sparse format and automatically optimize calculations - the data does not require full decompression during queries. To enable this sparse serialization, define the `ratio_of_defaults_for_sparse_serialization` setting to be less than 1.0. If the value is greater than or equal to 1.0 (the default), then the columns will be always written using the normal full serialization.
+
+Possible values:
+
+- Float between 0 and 1 to enable sparse serialization
+- 1.0 (or greater) if you do not want to use sparse serialization
+
+Default value: `1.0` (sparse serialization is disabled)
+
+**Example**
+
+Notice the `s` column in the following table is an empty string for 95% of the rows. In `my_regular_table` we do not use sparse serialization, and in `my_sparse_table` we set `ratio_of_defaults_for_sparse_serialization` to 0.95:
+
+```sql
+CREATE TABLE my_regular_table
+(
+    `id` UInt64,
+    `s` String
+)
+ENGINE = MergeTree
+ORDER BY id;
+
+INSERT INTO my_regular_table
+SELECT
+    number AS id,
+    number % 20 = 0 ? toString(number): '' AS s
+FROM
+    numbers(10000000);
+
+
+CREATE TABLE my_sparse_table
+(
+    `id` UInt64,
+    `s` String
+)
+ENGINE = MergeTree
+ORDER BY id
+SETTINGS ratio_of_defaults_for_sparse_serialization = 0.95;
+
+INSERT INTO my_sparse_table
+SELECT
+    number,
+    number % 20 = 0 ? toString(number): ''
+FROM
+    numbers(10000000);
+```
+
+Notice the `s` column in `my_sparse_table` uses less storage space on disk:
+
+```sql
+SELECT table, name, data_compressed_bytes, data_uncompressed_bytes FROM system.columns
+WHERE table LIKE 'my_%_table';
+```
+
+```response
+┌─table────────────┬─name─┬─data_compressed_bytes─┬─data_uncompressed_bytes─┐
+│ my_regular_table │ id   │              37790741 │                75488328 │
+│ my_regular_table │ s    │               2451377 │                12683106 │
+│ my_sparse_table  │ id   │              37790741 │                75488328 │
+│ my_sparse_table  │ s    │               2283454 │                 9855751 │
+└──────────────────┴──────┴───────────────────────┴─────────────────────────┘
+```
+
+You can verify if a column is using the sparse encoding by viewing the `serialization_kind` column of the `system.parts_columns` table:
+
+```sql
+SELECT column, serialization_kind FROM system.parts_columns
+WHERE table LIKE 'my_sparse_table';
+```
+
+You can see which parts of `s` were stored using the sparse serialization:
+
+```response
+┌─column─┬─serialization_kind─┐
+│ id     │ Default            │
+│ s      │ Default            │
+│ id     │ Default            │
+│ s      │ Default            │
+│ id     │ Default            │
+│ s      │ Sparse             │
+│ id     │ Default            │
+│ s      │ Sparse             │
+│ id     │ Default            │
+│ s      │ Sparse             │
+│ id     │ Default            │
+│ s      │ Sparse             │
+│ id     │ Default            │
+│ s      │ Sparse             │
+│ id     │ Default            │
+│ s      │ Sparse             │
+│ id     │ Default            │
+│ s      │ Sparse             │
+│ id     │ Default            │
+│ s      │ Sparse             │
+│ id     │ Default            │
+│ s      │ Sparse             │
+└────────┴────────────────────┘
+```
--- a/docs/en/operations/system-tables/disks.md
+++ b/docs/en/operations/system-tables/disks.md
@ -17,10 +17,10 @@ Columns:
 **Example**

 ```sql
-:) SELECT * FROM system.disks;
+SELECT * FROM system.disks;
 ```

-```text
+```response
 ┌─name────┬─path─────────────────┬───free_space─┬──total_space─┬─keep_free_space─┐
 │ default │ /var/lib/clickhouse/ │ 276392587264 │ 490652508160 │               0 │
 └─────────┴──────────────────────┴──────────────┴──────────────┴─────────────────┘
--- a/docs/en/operations/system-tables/merge_tree_settings.md
+++ b/docs/en/operations/system-tables/merge_tree_settings.md
@ -15,10 +15,10 @@ Columns:

 **Example**
 ```sql
-:) SELECT * FROM system.merge_tree_settings LIMIT 4 FORMAT Vertical;
+SELECT * FROM system.merge_tree_settings LIMIT 4 FORMAT Vertical;
 ```

-```text
+```response
 Row 1:
 ──────
 name:        index_granularity
--- a/docs/en/operations/system-tables/moves.md
+++ b/docs/en/operations/system-tables/moves.md
@ -0,0 +1,42 @@
+---
+slug: /en/operations/system-tables/moves
+---
+# moves
+
+The table contains information about in-progress [data part moves](/docs/en/sql-reference/statements/alter/partition#move-partitionpart) of [MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) tables. Each data part movement is represented by a single row.
+
+Columns:
+
+-   `database` ([String](/docs/en/sql-reference/data-types/string.md)) — Name of the database.
+
+-   `table` ([String](/docs/en/sql-reference/data-types/string.md)) — Name of the table containing moving data part.
+
+-   `elapsed` ([Float64](../../sql-reference/data-types/float.md)) — Time elapsed (in seconds) since data part movement started.
+
+-   `target_disk_name` ([String](disks.md)) — Name of [disk](/docs/en/operations/system-tables/disks/) to which the data part is moving.
+
+-   `target_disk_path` ([String](disks.md)) — Path to the mount point of the [disk](/docs/en/operations/system-tables/disks/) in the file system.
+
+-   `part_name` ([String](/docs/en/sql-reference/data-types/string.md)) — Name of the data part being moved.
+
+-   `part_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Data part size.
+
+-   `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Identifier of a thread performing the movement.
+
+**Example**
+
+```sql
+SELECT * FROM system.moves
+```
+
+```response
+┌─database─┬─table─┬─────elapsed─┬─target_disk_name─┬─target_disk_path─┬─part_name─┬─part_size─┬─thread_id─┐
+│ default  │ test2 │ 1.668056039 │ s3               │ ./disks/s3/      │ all_3_3_0 │       136 │    296146 │
+└──────────┴───────┴─────────────┴──────────────────┴──────────────────┴───────────┴───────────┴───────────┘
+```
+
+**See Also**
+
+-   [MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) table engine
+-   [Using Multiple Block Devices for Data Storage](/docs/en/engines/table-engines/mergetree-family/mergetree#table_engine-mergetree-multiple-volumes)
+-   [ALTER TABLE ... MOVE PART](/docs/en/sql-reference/statements/alter/partition#move-partitionpart) command
--- a/docs/en/operations/system-tables/numbers.md
+++ b/docs/en/operations/system-tables/numbers.md
@ -12,10 +12,10 @@ Reads from this table are not parallelized.
 **Example**

 ```sql
-:) SELECT * FROM system.numbers LIMIT 10;
+SELECT * FROM system.numbers LIMIT 10;
 ```

-```text
+```response
 ┌─number─┐
 │      0 │
 │      1 │
--- a/docs/en/operations/system-tables/numbers_mt.md
+++ b/docs/en/operations/system-tables/numbers_mt.md
@ -10,10 +10,10 @@ Used for tests.
 **Example**

 ```sql
-:) SELECT * FROM system.numbers_mt LIMIT 10;
+SELECT * FROM system.numbers_mt LIMIT 10;
 ```

-```text
+```response
 ┌─number─┐
 │      0 │
 │      1 │
--- a/docs/en/operations/system-tables/one.md
+++ b/docs/en/operations/system-tables/one.md
@ -12,10 +12,10 @@ This is similar to the `DUAL` table found in other DBMSs.
 **Example**

 ```sql
-:) SELECT * FROM system.one LIMIT 10;
+SELECT * FROM system.one LIMIT 10;
 ```

-```text
+```response
 ┌─dummy─┐
 │     0 │
 └───────┘
--- a/docs/en/operations/system-tables/processes.md
+++ b/docs/en/operations/system-tables/processes.md
@ -20,10 +20,10 @@ Columns:
 -   `is_all_data_sent` (Int8) – Was all data sent to the client (in other words query had been finished on the server).

 ```sql
-:) SELECT * FROM system.processes LIMIT 10 FORMAT Vertical;
+SELECT * FROM system.processes LIMIT 10 FORMAT Vertical;
 ```

-```text
+```response
 Row 1:
 ──────
 is_initial_query:     1
--- a/docs/en/sql-reference/functions/nlp-functions.md
+++ b/docs/en/sql-reference/functions/nlp-functions.md
@ -5,7 +5,7 @@ sidebar_label: NLP
 title: "[experimental] Natural Language Processing functions"
 ---

-:::warning    
+:::warning
 This is an experimental feature that is currently in development and is not ready for general use. It will change in unpredictable backwards-incompatible ways in future releases. Set `allow_experimental_nlp_functions = 1` to enable it.
 :::

@ -131,3 +131,153 @@ Configuration:
    </extension>
 </synonyms_extensions>
 ```
+
+## detectLanguage
+
+Detects the language of the UTF8-encoded input string. The function uses the [CLD2 library](https://github.com/CLD2Owners/cld2) for detection, and it returns the 2-letter ISO language code.
+
+The `detectLanguage` function works best when providing over 200 characters in the input string.
+
+**Syntax**
+
+``` sql
+detectLanguage('text_to_be_analyzed')
+```
+
+**Arguments**
+
+-   `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../../sql-reference/data-types/string.md#string).
+
+**Returned value**
+
+- The 2-letter ISO code of the detected language
+
+Other possible results:
+
+- `un` = unknown, can not detect any language.
+- `other` = the detected language does not have 2 letter code.
+
+**Examples**
+
+Query:
+
+```sql
+SELECT detectLanguageMixed('Je pense que je ne parviendrai jamais à parler français comme un natif. Where there’s a will, there’s a way.');
+```
+
+Result:
+
+```response
+fr
+```
+
+## detectLanguageMixed
+
+Similar to the `detectLanguage` function, but `detectLanguageMixed` returns a `Map` of 2-letter language codes that are mapped to the percentage of the certain language in the text.
+
+
+**Syntax**
+
+``` sql
+detectLanguageMixed('text_to_be_analyzed')
+```
+
+**Arguments**
+
+-   `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../../sql-reference/data-types/string.md#string).
+
+**Returned value**
+
+- `Map(String, Float32)`: The keys are 2-letter ISO codes and the values are a perentage of text found for that language
+
+
+**Examples**
+
+Query:
+
+```sql
+SELECT detectLanguageMixed('二兎を追う者は一兎をも得ず二兎を追う者は一兎をも得ず A vaincre sans peril, on triomphe sans gloire.');
+```
+
+Result:
+
+```response
+┌─detectLanguageMixed()─┐
+│ {'ja':0.62,'fr':0.36  │
+└───────────────────────┘
+```
+
+## detectLanguageUnknown
+
+Similar to the `detectLanguage` function, except the `detectLanguageUnknown` function works with non-UTF8-encoded strings. Prefer this version when your character set is UTF-16 or UTF-32.
+
+
+**Syntax**
+
+``` sql
+detectLanguageUnknown('text_to_be_analyzed')
+```
+
+**Arguments**
+
+-   `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../../sql-reference/data-types/string.md#string).
+
+**Returned value**
+
+- The 2-letter ISO code of the detected language
+
+Other possible results:
+
+- `un` = unknown, can not detect any language.
+- `other` = the detected language does not have 2 letter code.
+
+**Examples**
+
+Query:
+
+```sql
+SELECT detectLanguageUnknown('Ich bleibe für ein paar Tage.');
+```
+
+Result:
+
+```response
+┌─detectLanguageUnknown('Ich bleibe für ein paar Tage.')─┐
+│ de                                                     │
+└────────────────────────────────────────────────────────┘
+```
+
+## detectCharset
+
+The `detectCharset` function detects the character set of the non-UTF8-encoded input string.
+
+
+**Syntax**
+
+``` sql
+detectCharset('text_to_be_analyzed')
+```
+
+**Arguments**
+
+-   `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../../sql-reference/data-types/string.md#string).
+
+**Returned value**
+
+- A `String` containing the code of the detected character set
+
+**Examples**
+
+Query:
+
+```sql
+SELECT detectCharset('Ich bleibe für ein paar Tage.');
+```
+
+Result:
+
+```response
+┌─detectCharset('Ich bleibe für ein paar Tage.')─┐
+│ WINDOWS-1252                                   │
+└────────────────────────────────────────────────┘
+```
--- a/docs/en/sql-reference/statements/create/view.md
+++ b/docs/en/sql-reference/statements/create/view.md
@ -36,6 +36,18 @@ This query is fully equivalent to using the subquery:
 SELECT a, b, c FROM (SELECT ...)
 ```

+## Parameterized View
+Parametrized views are similar to normal views, but can be created with parameters which are not resolved immediately. These views can be used with table functions, which specify the name of the view as function name and the parameter values as its arguments.
+
+``` sql
+CREATE VIEW view AS SELECT * FROM TABLE WHERE Column1={column1:datatype1} and Column2={column2:datatype2} ...
+```
+The above creates a view for table which can be used as table function by substituting parameters as shown below.
+
+``` sql
+SELECT * FROM view(column1=value1, column2=value2 ...)
+```
+
 ## Materialized View

 ``` sql
--- a/docs/en/sql-reference/statements/system.md
+++ b/docs/en/sql-reference/statements/system.md
@ -111,6 +111,10 @@ This will also create system tables even if message queue is empty.

 Reloads ClickHouse configuration. Used when configuration is stored in ZooKeeper.

+## RELOAD USERS
+
+Reloads all access storages, including: users.xml, local disk access storage, replicated (in ZooKeeper) access storage. Note that `SYSTEM RELOAD CONFIG` will only reload users.xml access storage.
+
 ## SHUTDOWN

 Normally shuts down ClickHouse (like `service clickhouse-server stop` / `kill {$pid_clickhouse-server}`)
--- a/docs/en/sql-reference/syntax.md
+++ b/docs/en/sql-reference/syntax.md
@ -128,6 +128,56 @@ Result:
 └────────────────────────────┘
 ```

+## Defining and Using Query Parameters
+
+Query parameters can be defined using the syntax `param_name=value`, where `name` is the name of the parameter. Parameters can by defined using the `SET` command, or from the command-line using `--param`.
+
+To retrieve a query parameter, you specify the name of the parameter along with its data type surrounded by curly braces:
+
+```sql
+{name:datatype}
+```
+
+For example, the following SQL defines parameters named `a`, `b`, `c` and `d` - each of a different data type:
+
+```sql
+SET param_a = 13, param_b = 'str';
+SET param_c = '2022-08-04 18:30:53';
+SET param_d = {'10': [11, 12], '13': [14, 15]}';
+
+SELECT
+   {a: UInt32},
+   {b: String},
+   {c: DateTime},
+   {d: Map(String, Array(UInt8))};
+```
+
+Result:
+
+```response
+13	str	2022-08-04 18:30:53	{'10':[11,12],'13':[14,15]}
+```
+
+If you are using `clickhouse-client`, the parameters are specified as `--param_name=value`. For example, the following parameter has the name `message` and it is being retrieved as a `String`:
+
+```sql
+clickhouse-client --param_message='hello' --query="SELECT {message: String}"
+```
+
+Result:
+
+```response
+hello
+```
+
+If the query parameter represents the name of a database, table, function or other identifier, use `Identifier` for its type. For example, the following query returns rows from a table named `uk_price_paid`:
+
+```sql
+SET param_mytablename = "uk_price_paid";
+SELECT * FROM {mytablename:Identifier};
+```
+
+
 ## Functions

 Function calls are written like an identifier with a list of arguments (possibly empty) in round brackets. In contrast to standard SQL, the brackets are required, even for an empty argument list. Example: `now()`.
--- a/docs/en/sql-reference/table-functions/deltalake.md
+++ b/docs/en/sql-reference/table-functions/deltalake.md
@ -0,0 +1,51 @@
+---
+slug: /en/sql-reference/table-functions/deltalake
+sidebar_label: DeltaLake
+---
+
+# deltaLake Table Function
+
+Provides a read-only table-like interface to [Delta Lake](https://github.com/delta-io/delta) tables in Amazon S3.
+
+## Syntax
+
+``` sql
+deltaLake(url [,aws_access_key_id, aws_secret_access_key] [,format] [,structure] [,compression])
+```
+
+## Arguments
+
+- `url` — Bucket url with path to existing Delta Lake table in S3.
+- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user.  You can use these to authenticate your requests. These parameters are optional. If credentials are not specified, they are used from the ClickHouse configuration. For more information see [Using S3 for Data Storage](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-s3).
+- `format` — The [format](/docs/en/interfaces/formats.md/#formats) of the file.
+- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
+- `compression` — Parameter is optional. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, compression will be autodetected by the file extension.
+
+**Returned value**
+
+A table with the specified structure for reading data in the specified Delta Lake table in S3.
+
+**Examples**
+
+Selecting rows from the table in S3 `https://clickhouse-public-datasets.s3.amazonaws.com/delta_lake/hits/`:
+
+``` sql
+SELECT
+    URL,
+    UserAgent
+FROM deltaLake('https://clickhouse-public-datasets.s3.amazonaws.com/delta_lake/hits/')
+WHERE URL IS NOT NULL
+LIMIT 2
+```
+
+``` response
+┌─URL───────────────────────────────────────────────────────────────────┬─UserAgent─┐
+│ http://auto.ria.ua/search/index.kz/jobinmoscow/detail/55089/hasimages │         1 │
+│ http://auto.ria.ua/search/index.kz/jobinmoscow.ru/gosushi             │         1 │
+└───────────────────────────────────────────────────────────────────────┴───────────┘
+```
+
+**See Also**
+
+- [DeltaLake engine](/docs/en/engines/table-engines/integrations/deltalake.md)
+
--- a/docs/en/sql-reference/table-functions/format.md
+++ b/docs/en/sql-reference/table-functions/format.md
@ -27,7 +27,7 @@ A table with data parsed from `data` argument according specified format and ext

 **Query:**
 ``` sql
-:) select * from format(JSONEachRow, 
+SELECT * FROM format(JSONEachRow,
 $$
 {"a": "Hello", "b": 111}
 {"a": "World", "b": 123}
@ -38,7 +38,7 @@ $$)

 **Result:**

-```text
+```response
 ┌───b─┬─a─────┐
 │ 111 │ Hello │
 │ 123 │ World │
@ -49,8 +49,7 @@ $$)

 **Query:**
 ```sql
-
-:) desc format(JSONEachRow,
+DESC format(JSONEachRow,
 $$
 {"a": "Hello", "b": 111}
 {"a": "World", "b": 123}
@ -61,7 +60,7 @@ $$)

 **Result:**

-```text
+```response
 ┌─name─┬─type──────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
 │ b    │ Nullable(Float64) │              │                    │         │                  │                │
 │ a    │ Nullable(String)  │              │                    │         │                  │                │
--- a/docs/en/sql-reference/table-functions/hudi.md
+++ b/docs/en/sql-reference/table-functions/hudi.md
@ -0,0 +1,31 @@
+---
+slug: /en/sql-reference/table-functions/hudi
+sidebar_label: Hudi
+---
+
+# hudi Table Function
+
+Provides a read-only table-like interface to Apache [Hudi](https://hudi.apache.org/) tables in Amazon S3.
+
+## Syntax
+
+``` sql
+hudi(url [,aws_access_key_id, aws_secret_access_key] [,format] [,structure] [,compression])
+```
+
+## Arguments
+
+- `url` — Bucket url with the path to an existing Hudi table in S3.
+- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user.  You can use these to authenticate your requests. These parameters are optional. If credentials are not specified, they are used from the ClickHouse configuration. For more information see [Using S3 for Data Storage](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-s3).
+- `format` — The [format](/docs/en/interfaces/formats.md/#formats) of the file.
+- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
+- `compression` — Parameter is optional. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, compression will be autodetected by the file extension.
+
+**Returned value**
+
+A table with the specified structure for reading data in the specified Hudi table in S3.
+
+**See Also**
+
+- [Hudi engine](/docs/en/engines/table-engines/integrations/hudi.md)
+
--- a/docs/ru/operations/external-authenticators/kerberos.md
+++ b/docs/ru/operations/external-authenticators/kerberos.md
@ -22,6 +22,9 @@ ClickHouse предоставляет возможность аутентифи
 - `realm` &mdash; обеспечивает фильтрацию по реалм (realm). Пользователям, чей реалм не совпадает с указанным, будет отказано в аутентификации.
  - Это опциональный параметр, при его отсутствии фильтр по реалм применяться не будет.

+- `keytab` &mdash; задаёт путь к файлу keytab.
+  - Это опциональный параметр, при его отсутствии путь к файлу keytab должен быть задан в переменной окружения `KRB5_KTNAME`.
+
 Примеры, как должен выглядеть файл `config.xml`:

 ```xml
--- a/docs/ru/sql-reference/table-functions/format.md
+++ b/docs/ru/sql-reference/table-functions/format.md
@ -27,7 +27,7 @@ A table with data parsed from `data` argument according specified format and ext

 **Query:**
 ``` sql
-:) select * from format(JSONEachRow,
+SELECT * FROM format(JSONEachRow,
 $$
 {"a": "Hello", "b": 111}
 {"a": "World", "b": 123}
@ -38,7 +38,7 @@ $$)

 **Result:**

-```text
+```response
 ┌───b─┬─a─────┐
 │ 111 │ Hello │
 │ 123 │ World │
@ -49,8 +49,7 @@ $$)

 **Query:**
 ```sql
-
-:) desc format(JSONEachRow,
+DESC format(JSONEachRow,
 $$
 {"a": "Hello", "b": 111}
 {"a": "World", "b": 123}
@ -61,7 +60,7 @@ $$)

 **Result:**

-```text
+```response
 ┌─name─┬─type──────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
 │ b    │ Nullable(Float64) │              │                    │         │                  │                │
 │ a    │ Nullable(String)  │              │                    │         │                  │                │
--- a/docs/tools/build.py
+++ b/docs/tools/build.py
@ -1,45 +1,96 @@
 #!/usr/bin/env python3

+from pathlib import Path
 import argparse
 import logging
-import os
 import shutil
-import subprocess
 import sys

 import livereload

-import redirects
-import website
+
+def write_redirect_html(output_path: Path, to_url: str) -> None:
+    output_dir = output_path.parent
+    output_dir.mkdir(parents=True, exist_ok=True)
+    output_path.write_text(
+        f"""<!--[if IE 6]> Redirect: {to_url} <![endif]-->
+<!DOCTYPE HTML>
+<html lang="en-US">
+    <head>
+        <meta charset="UTF-8">
+        <meta http-equiv="refresh" content="0; url={to_url}">
+        <script type="text/javascript">
+            window.location.href = "{to_url}";
+        </script>
+        <title>Page Redirection</title>
+    </head>
+    <body>
+        If you are not redirected automatically, follow this <a href="{to_url}">link</a>.
+    </body>
+</html>"""
+    )


-def build(args):
-    if os.path.exists(args.output_dir):
+def build_static_redirects(output_dir: Path):
+    for static_redirect in [
+        ("benchmark.html", "/benchmark/dbms/"),
+        ("benchmark_hardware.html", "/benchmark/hardware/"),
+        (
+            "tutorial.html",
+            "/docs/en/getting_started/tutorial/",
+        ),
+        (
+            "reference_en.html",
+            "/docs/en/single/",
+        ),
+        (
+            "reference_ru.html",
+            "/docs/ru/single/",
+        ),
+        (
+            "docs/index.html",
+            "/docs/en/",
+        ),
+    ]:
+        write_redirect_html(output_dir / static_redirect[0], static_redirect[1])
+
+
+def build(root_dir: Path, output_dir: Path):
+    if output_dir.exists():
        shutil.rmtree(args.output_dir)

-    if not args.skip_website:
-        website.build_website(args)
-        redirects.build_static_redirects(args)
+    (output_dir / "data").mkdir(parents=True)
+
+    logging.info("Building website")
+
+    # This file can be requested to check for available ClickHouse releases.
+    shutil.copy2(
+        root_dir / "utils" / "list-versions" / "version_date.tsv",
+        output_dir / "data" / "version_date.tsv",
+    )
+
+    # This file can be requested to install ClickHouse.
+    shutil.copy2(
+        root_dir / "docs" / "_includes" / "install" / "universal.sh",
+        output_dir / "data" / "install.sh",
+    )
+
+    build_static_redirects(output_dir)


 if __name__ == "__main__":
-    os.chdir(os.path.join(os.path.dirname(__file__), ".."))
+    root_dir = Path(__file__).parent.parent.parent
+    docs_dir = root_dir / "docs"

-    # A root path to ClickHouse source code.
-    src_dir = ".."
-
-    website_dir = os.path.join(src_dir, "website")
-
-    arg_parser = argparse.ArgumentParser()
-    arg_parser.add_argument("--lang", default="en,ru,zh,ja")
-    arg_parser.add_argument("--theme-dir", default=website_dir)
-    arg_parser.add_argument("--website-dir", default=website_dir)
-    arg_parser.add_argument("--src-dir", default=src_dir)
-    arg_parser.add_argument("--output-dir", default="build")
-    arg_parser.add_argument("--nav-limit", type=int, default="0")
-    arg_parser.add_argument("--skip-multi-page", action="store_true")
-    arg_parser.add_argument("--skip-website", action="store_true")
-    arg_parser.add_argument("--htmlproofer", action="store_true")
+    arg_parser = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    arg_parser.add_argument(
+        "--output-dir",
+        type=Path,
+        default=docs_dir / "build",
+        help="path to the output dir",
+    )
    arg_parser.add_argument("--livereload", type=int, default="0")
    arg_parser.add_argument("--verbose", action="store_true")

@ -49,26 +100,9 @@ if __name__ == "__main__":
        level=logging.DEBUG if args.verbose else logging.INFO, stream=sys.stderr
    )

-    logging.getLogger("MARKDOWN").setLevel(logging.INFO)
-
-    args.rev = (
-        subprocess.check_output("git rev-parse HEAD", shell=True)
-        .decode("utf-8")
-        .strip()
-    )
-    args.rev_short = (
-        subprocess.check_output("git rev-parse --short HEAD", shell=True)
-        .decode("utf-8")
-        .strip()
-    )
-    args.rev_url = f"https://github.com/ClickHouse/ClickHouse/commit/{args.rev}"
-
-    build(args)
+    build(root_dir, args.output_dir)

    if args.livereload:
-        new_args = [arg for arg in sys.argv if not arg.startswith("--livereload")]
-        new_args = sys.executable + " " + " ".join(new_args)
-
        server = livereload.Server()
        server.serve(root=args.output_dir, host="0.0.0.0", port=args.livereload)
        sys.exit(0)
--- a/docs/tools/make_links.sh
+++ b/docs/tools/make_links.sh
@ -1,22 +0,0 @@
-#!/bin/bash
-
-# Fixes missing documentation in other languages
-# by putting relative symbolic links to the original doc file.
-
-BASE_DIR=$(dirname $(readlink -f $0))
-
-function do_make_links()
-{
-    set -x
-    langs=(en zh ru ja)
-    src_file="$1"
-    for lang in "${langs[@]}"
-    do
-        dst_file="${src_file/\/en\///${lang}/}"
-        mkdir -p $(dirname "${dst_file}")
-        ln -sr "${src_file}" "${dst_file}" 2>/dev/null
-    done
-}
-
-export -f do_make_links
-find "${BASE_DIR}/../en" -iname '*.md' -exec /bin/bash -c 'do_make_links "{}"' \;
--- a/docs/tools/mdx_clickhouse.py
+++ b/docs/tools/mdx_clickhouse.py
@ -1,142 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-
-import datetime
-import os
-import subprocess
-
-import jinja2
-import markdown.inlinepatterns
-import markdown.extensions
-import markdown.util
-import macros.plugin
-
-import slugify as slugify_impl
-
-
-def slugify(value, separator):
-    return slugify_impl.slugify(
-        value, separator=separator, word_boundary=True, save_order=True
-    )
-
-
-MARKDOWN_EXTENSIONS = [
-    "mdx_clickhouse",
-    "admonition",
-    "attr_list",
-    "def_list",
-    "codehilite",
-    "nl2br",
-    "sane_lists",
-    "pymdownx.details",
-    "pymdownx.magiclink",
-    "pymdownx.superfences",
-    "extra",
-    {"toc": {"permalink": True, "slugify": slugify}},
-]
-
-
-class ClickHouseLinkMixin(object):
-    def handleMatch(self, m, data):
-        try:
-            el, start, end = super(ClickHouseLinkMixin, self).handleMatch(m, data)
-        except IndexError:
-            return
-
-        if el is not None:
-            href = el.get("href") or ""
-            is_external = href.startswith("http:") or href.startswith("https:")
-            if is_external:
-                if not href.startswith("https://clickhouse.com"):
-                    el.set("rel", "external nofollow noreferrer")
-        return el, start, end
-
-
-class ClickHouseAutolinkPattern(
-    ClickHouseLinkMixin, markdown.inlinepatterns.AutolinkInlineProcessor
-):
-    pass
-
-
-class ClickHouseLinkPattern(
-    ClickHouseLinkMixin, markdown.inlinepatterns.LinkInlineProcessor
-):
-    pass
-
-
-class ClickHousePreprocessor(markdown.util.Processor):
-    def run(self, lines):
-        for line in lines:
-            if "<!--hide-->" not in line:
-                yield line
-
-
-class ClickHouseMarkdown(markdown.extensions.Extension):
-    def extendMarkdown(self, md, md_globals):
-        md.preprocessors["clickhouse"] = ClickHousePreprocessor()
-        md.inlinePatterns["link"] = ClickHouseLinkPattern(
-            markdown.inlinepatterns.LINK_RE, md
-        )
-        md.inlinePatterns["autolink"] = ClickHouseAutolinkPattern(
-            markdown.inlinepatterns.AUTOLINK_RE, md
-        )
-
-
-def makeExtension(**kwargs):
-    return ClickHouseMarkdown(**kwargs)
-
-
-def get_translations(dirname, lang):
-    import babel.support
-
-    return babel.support.Translations.load(dirname=dirname, locales=[lang, "en"])
-
-
-class PatchedMacrosPlugin(macros.plugin.MacrosPlugin):
-    disabled = False
-
-    def on_config(self, config):
-        super(PatchedMacrosPlugin, self).on_config(config)
-        self.env.comment_start_string = "{##"
-        self.env.comment_end_string = "##}"
-        self.env.loader = jinja2.FileSystemLoader(
-            [
-                os.path.join(config.data["site_dir"]),
-                os.path.join(config.data["extra"]["includes_dir"]),
-            ]
-        )
-
-    def on_env(self, env, config, files):
-        import util
-
-        env.add_extension("jinja2.ext.i18n")
-        dirname = os.path.join(config.data["theme"].dirs[0], "locale")
-        lang = config.data["theme"]["language"]
-        env.install_gettext_translations(get_translations(dirname, lang), newstyle=True)
-        util.init_jinja2_filters(env)
-        return env
-
-    def render(self, markdown):
-        if not self.disabled:
-            return self.render_impl(markdown)
-        else:
-            return markdown
-
-    def on_page_markdown(self, markdown, page, config, files):
-        markdown = super(PatchedMacrosPlugin, self).on_page_markdown(
-            markdown, page, config, files
-        )
-
-        if os.path.islink(page.file.abs_src_path):
-            lang = config.data["theme"]["language"]
-            page.canonical_url = page.canonical_url.replace(f"/{lang}/", "/en/", 1)
-
-        return markdown
-
-    def render_impl(self, markdown):
-        md_template = self.env.from_string(markdown)
-        return md_template.render(**self.variables)
-
-
-macros.plugin.MacrosPlugin = PatchedMacrosPlugin
--- a/docs/tools/redirects.py
+++ b/docs/tools/redirects.py
@ -1,53 +0,0 @@
-import os
-
-
-def write_redirect_html(out_path, to_url):
-    out_dir = os.path.dirname(out_path)
-    try:
-        os.makedirs(out_dir)
-    except OSError:
-        pass
-    with open(out_path, "w") as f:
-        f.write(
-            f"""<!--[if IE 6]> Redirect: {to_url} <![endif]-->
-<!DOCTYPE HTML>
-<html lang="en-US">
-    <head>
-        <meta charset="UTF-8">
-        <meta http-equiv="refresh" content="0; url={to_url}">
-        <script type="text/javascript">
-            window.location.href = "{to_url}";
-        </script>
-        <title>Page Redirection</title>
-    </head>
-    <body>
-        If you are not redirected automatically, follow this <a href="{to_url}">link</a>.
-    </body>
-</html>"""
-        )
-
-
-def build_static_redirects(args):
-    for static_redirect in [
-        ("benchmark.html", "/benchmark/dbms/"),
-        ("benchmark_hardware.html", "/benchmark/hardware/"),
-        (
-            "tutorial.html",
-            "/docs/en/getting_started/tutorial/",
-        ),
-        (
-            "reference_en.html",
-            "/docs/en/single/",
-        ),
-        (
-            "reference_ru.html",
-            "/docs/ru/single/",
-        ),
-        (
-            "docs/index.html",
-            "/docs/en/",
-        ),
-    ]:
-        write_redirect_html(
-            os.path.join(args.output_dir, static_redirect[0]), static_redirect[1]
-        )
--- a/docs/tools/release.sh
+++ b/docs/tools/release.sh
@ -25,7 +25,10 @@ then
    # Add files.
    cp -R "${BUILD_DIR}"/* .
    echo -n "${BASE_DOMAIN}" > CNAME
-    echo -n "" > README.md
+    cat > README.md << 'EOF'
+## This repo is the source for https://content.clickhouse.com
+It's built in [the action](https://github.com/ClickHouse/ClickHouse/blob/master/.github/workflows/docs_release.yml) in the DocsRelease job.
+EOF
    echo -n "" > ".nojekyll"
    cp "${BASE_DIR}/../../LICENSE" .
    git add ./*
--- a/docs/tools/requirements.txt
+++ b/docs/tools/requirements.txt
@ -1,30 +1 @@
-Babel==2.9.1
-Jinja2==3.0.3
-Markdown==3.3.2
-MarkupSafe==2.1.1
-PyYAML==6.0
-Pygments>=2.12.0
-beautifulsoup4==4.9.1
-click==7.1.2
-ghp_import==2.1.0
-importlib_metadata==4.11.4
-jinja2-highlight==0.6.1
 livereload==2.6.3
-mergedeep==1.3.4
-mkdocs-macros-plugin==0.4.20
-mkdocs-macros-test==0.1.0
-mkdocs-material==8.2.15
-mkdocs==1.3.0
-mkdocs_material_extensions==1.0.3
-packaging==21.3
-pymdown_extensions==9.4
-pyparsing==3.0.9
-python-slugify==4.0.1
-python_dateutil==2.8.2
-pytz==2022.1
-six==1.15.0
-soupsieve==2.3.2
-termcolor==1.1.0
-text_unidecode==1.3
-tornado==6.1
-zipp==3.8.0
--- a/docs/tools/util.py
+++ b/docs/tools/util.py
@ -1,136 +0,0 @@
-import collections
-import contextlib
-import datetime
-import multiprocessing
-import os
-import shutil
-import sys
-import socket
-import tempfile
-import threading
-
-import jinja2
-import yaml
-
-
-@contextlib.contextmanager
-def temp_dir():
-    path = tempfile.mkdtemp(dir=os.environ.get("TEMP"))
-    try:
-        yield path
-    finally:
-        shutil.rmtree(path)
-
-
-@contextlib.contextmanager
-def cd(new_cwd):
-    old_cwd = os.getcwd()
-    os.chdir(new_cwd)
-    try:
-        yield
-    finally:
-        os.chdir(old_cwd)
-
-
-def get_free_port():
-    with contextlib.closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
-        s.bind(("", 0))
-        s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
-        return s.getsockname()[1]
-
-
-def run_function_in_parallel(func, args_list, threads=False):
-    processes = []
-    exit_code = 0
-    for task in args_list:
-        cls = threading.Thread if threads else multiprocessing.Process
-        processes.append(cls(target=func, args=task))
-        processes[-1].start()
-    for process in processes:
-        process.join()
-        if not threads:
-            if process.exitcode and not exit_code:
-                exit_code = process.exitcode
-    if exit_code:
-        sys.exit(exit_code)
-
-
-def read_md_file(path):
-    in_meta = False
-    meta = {}
-    meta_text = []
-    content = []
-    if os.path.exists(path):
-        with open(path, "r") as f:
-            for line in f:
-                if line.startswith("---"):
-                    if in_meta:
-                        in_meta = False
-                        meta = yaml.full_load("".join(meta_text))
-                    else:
-                        in_meta = True
-                else:
-                    if in_meta:
-                        meta_text.append(line)
-                    else:
-                        content.append(line)
-    return meta, "".join(content)
-
-
-def write_md_file(path, meta, content):
-    dirname = os.path.dirname(path)
-    if not os.path.exists(dirname):
-        os.makedirs(dirname)
-
-    with open(path, "w") as f:
-        if meta:
-            print("---", file=f)
-            yaml.dump(meta, f)
-            print("---", file=f)
-            if not content.startswith("\n"):
-                print("", file=f)
-        f.write(content)
-
-
-def represent_ordereddict(dumper, data):
-    value = []
-    for item_key, item_value in data.items():
-        node_key = dumper.represent_data(item_key)
-        node_value = dumper.represent_data(item_value)
-
-        value.append((node_key, node_value))
-
-    return yaml.nodes.MappingNode("tag:yaml.org,2002:map", value)
-
-
-yaml.add_representer(collections.OrderedDict, represent_ordereddict)
-
-
-def init_jinja2_filters(env):
-    import website
-
-    chunk_size = 10240
-    env.filters["chunks"] = lambda line: [
-        line[i : i + chunk_size] for i in range(0, len(line), chunk_size)
-    ]
-    env.filters["to_rfc882"] = lambda d: datetime.datetime.strptime(
-        d, "%Y-%m-%d"
-    ).strftime("%a, %d %b %Y %H:%M:%S GMT")
-
-
-def init_jinja2_env(args):
-    import mdx_clickhouse
-
-    env = jinja2.Environment(
-        loader=jinja2.FileSystemLoader(
-            [args.website_dir, os.path.join(args.src_dir, "docs", "_includes")]
-        ),
-        extensions=["jinja2.ext.i18n", "jinja2_highlight.HighlightExtension"],
-    )
-    env.extend(jinja2_highlight_cssclass="syntax p-3 my-3")
-    translations_dir = os.path.join(args.website_dir, "locale")
-    env.install_gettext_translations(
-        mdx_clickhouse.get_translations(translations_dir, "en"), newstyle=True
-    )
-    init_jinja2_filters(env)
-    return env
--- a/docs/tools/website.py
+++ b/docs/tools/website.py
@ -1,63 +0,0 @@
-import hashlib
-import json
-import logging
-import os
-import shutil
-import subprocess
-
-import util
-
-
-def build_website(args):
-    logging.info("Building website")
-    env = util.init_jinja2_env(args)
-
-    shutil.copytree(
-        args.website_dir,
-        args.output_dir,
-        ignore=shutil.ignore_patterns(
-            "*.md",
-            "*.sh",
-            "*.css",
-            "*.json",
-            "js/*.js",
-            "build",
-            "docs",
-            "public",
-            "node_modules",
-            "src",
-            "templates",
-            "locale",
-            ".gitkeep",
-        ),
-    )
-
-    # This file can be requested to check for available ClickHouse releases.
-    shutil.copy2(
-        os.path.join(args.src_dir, "utils", "list-versions", "version_date.tsv"),
-        os.path.join(args.output_dir, "data", "version_date.tsv"),
-    )
-
-    # This file can be requested to install ClickHouse.
-    shutil.copy2(
-        os.path.join(args.src_dir, "docs", "_includes", "install", "universal.sh"),
-        os.path.join(args.output_dir, "data", "install.sh"),
-    )
-
-    for root, _, filenames in os.walk(args.output_dir):
-        for filename in filenames:
-            if filename == "main.html":
-                continue
-
-            path = os.path.join(root, filename)
-            if not filename.endswith(".html"):
-                continue
-            logging.info("Processing %s", path)
-            with open(path, "rb") as f:
-                content = f.read().decode("utf-8")
-
-            template = env.from_string(content)
-            content = template.render(args.__dict__)
-
-            with open(path, "wb") as f:
-                f.write(content.encode("utf-8"))
--- a/docs/zh/engines/table-engines/mergetree-family/summingmergetree.md
+++ b/docs/zh/engines/table-engines/mergetree-family/summingmergetree.md
@ -69,7 +69,9 @@ ORDER BY key

 向其中插入数据：

-    :) INSERT INTO summtt Values(1,1),(1,2),(2,1)
+``` sql
+INSERT INTO summtt Values(1,1),(1,2),(2,1)
+```

 ClickHouse可能不会完整的汇总所有行（[见下文](#data-processing)）,因此我们在查询中使用了聚合函数 `sum` 和 `GROUP BY` 子句。

--- a/docs/zh/operations/system-tables/disks.md
+++ b/docs/zh/operations/system-tables/disks.md
@ -16,7 +16,7 @@ slug: /zh/operations/system-tables/disks
 **示例**

 ```sql
-:) SELECT * FROM system.disks;
+SELECT * FROM system.disks;
 ```

 ```text
--- a/docs/zh/operations/system-tables/merge_tree_settings.md
+++ b/docs/zh/operations/system-tables/merge_tree_settings.md
@ -16,10 +16,10 @@ slug: /zh/operations/system-tables/merge_tree_settings

 **示例**
 ```sql
-:) SELECT * FROM system.merge_tree_settings LIMIT 4 FORMAT Vertical;
+SELECT * FROM system.merge_tree_settings LIMIT 4 FORMAT Vertical;
 ```

-```text
+```response
 Row 1:
 ──────
 name:        index_granularity
--- a/docs/zh/operations/system-tables/numbers.md
+++ b/docs/zh/operations/system-tables/numbers.md
@ -12,10 +12,10 @@ slug: /zh/operations/system-tables/numbers
 **示例**

 ```sql
-:) SELECT * FROM system.numbers LIMIT 10;
+SELECT * FROM system.numbers LIMIT 10;
 ```

-```text
+```response
 ┌─number─┐
 │      0 │
 │      1 │
--- a/docs/zh/operations/system-tables/one.md
+++ b/docs/zh/operations/system-tables/one.md
@ -12,10 +12,10 @@ slug: /zh/operations/system-tables/one
 **示例**

 ```sql
-:) SELECT * FROM system.one LIMIT 10;
+SELECT * FROM system.one LIMIT 10;
 ```

-```text
+```response
 ┌─dummy─┐
 │     0 │
 └───────┘
--- a/docs/zh/sql-reference/data-types/array.md
+++ b/docs/zh/sql-reference/data-types/array.md
@ -19,29 +19,25 @@ slug: /zh/sql-reference/data-types/array

 创建数组示例：

-    :) SELECT array(1, 2) AS x, toTypeName(x)
+```sql
+SELECT array(1, 2) AS x, toTypeName(x)
+```

-    SELECT
-        [1, 2] AS x,
-        toTypeName(x)
+```response
+┌─x─────┬─toTypeName(array(1, 2))─┐
+│ [1,2] │ Array(UInt8)            │
+└───────┴─────────────────────────┘
+```

-    ┌─x─────┬─toTypeName(array(1, 2))─┐
-    │ [1,2] │ Array(UInt8)            │
-    └───────┴─────────────────────────┘
+``` sql
+SELECT [1, 2] AS x, toTypeName(x)
+```

-    1 rows in set. Elapsed: 0.002 sec.
-
-    :) SELECT [1, 2] AS x, toTypeName(x)
-
-    SELECT
-        [1, 2] AS x,
-        toTypeName(x)
-
-    ┌─x─────┬─toTypeName([1, 2])─┐
-    │ [1,2] │ Array(UInt8)       │
-    └───────┴────────────────────┘
-
-    1 rows in set. Elapsed: 0.002 sec.
+```response
+┌─x─────┬─toTypeName([1, 2])─┐
+│ [1,2] │ Array(UInt8)       │
+└───────┴────────────────────┘
+```

 ## 使用数据类型 {#shi-yong-shu-ju-lei-xing}

@ -50,26 +46,23 @@ ClickHouse会自动检测数组元素,并根据元素计算出存储这些元素
 如果 ClickHouse 无法确定数据类型，它将产生异常。当尝试同时创建一个包含字符串和数字的数组时会发生这种情况 (`SELECT array(1, 'a')`)。

 自动数据类型检测示例：
+```sql
+SELECT array(1, 2, NULL) AS x, toTypeName(x)
+```

-    :) SELECT array(1, 2, NULL) AS x, toTypeName(x)
-
-    SELECT
-        [1, 2, NULL] AS x,
-        toTypeName(x)
-
-    ┌─x──────────┬─toTypeName(array(1, 2, NULL))─┐
-    │ [1,2,NULL] │ Array(Nullable(UInt8))        │
-    └────────────┴───────────────────────────────┘
-
-    1 rows in set. Elapsed: 0.002 sec.
+```response
+┌─x──────────┬─toTypeName(array(1, 2, NULL))─┐
+│ [1,2,NULL] │ Array(Nullable(UInt8))        │
+└────────────┴───────────────────────────────┘
+```

 如果您尝试创建不兼容的数据类型数组，ClickHouse 将引发异常：

-    :) SELECT array(1, 'a')
+```sql
+SELECT array(1, 'a')
+```

-    SELECT [1, 'a']
-
-    Received exception from server (version 1.1.54388):
-    Code: 386. DB::Exception: Received from localhost:9000, 127.0.0.1. DB::Exception: There is no supertype for types UInt8, String because some of them are String/FixedString and some of them are not.
-
-    0 rows in set. Elapsed: 0.246 sec.
+```response
+Received exception from server (version 1.1.54388):
+Code: 386. DB::Exception: Received from localhost:9000, 127.0.0.1. DB::Exception: There is no supertype for types UInt8, String because some of them are String/FixedString and some of them are not.
+```
--- a/docs/zh/sql-reference/data-types/enum.md
+++ b/docs/zh/sql-reference/data-types/enum.md
@ -20,49 +20,64 @@ slug: /zh/sql-reference/data-types/enum

 这个 `x` 列只能存储类型定义中列出的值：`'hello'`或`'world'`。如果您尝试保存任何其他值，ClickHouse 抛出异常。

-    :) INSERT INTO t_enum VALUES ('hello'), ('world'), ('hello')
+```sql
+INSERT INTO t_enum VALUES ('hello'), ('world'), ('hello')
+```

-    INSERT INTO t_enum VALUES
+```response
+Ok.

-    Ok.
+3 rows in set. Elapsed: 0.002 sec.
+```

-    3 rows in set. Elapsed: 0.002 sec.
+```sql
+INSERT INTO t_enum VALUES('a')
+```

-    :) insert into t_enum values('a')
-
-    INSERT INTO t_enum VALUES
-
-
-    Exception on client:
-    Code: 49. DB::Exception: Unknown element 'a' for type Enum8('hello' = 1, 'world' = 2)
+```response
+Exception on client:
+Code: 49. DB::Exception: Unknown element 'a' for type Enum8('hello' = 1, 'world' = 2)
+```

 当您从表中查询数据时，ClickHouse 从 `Enum` 中输出字符串值。

-    SELECT * FROM t_enum
+```sql
+SELECT * FROM t_enum
+```

-    ┌─x─────┐
-    │ hello │
-    │ world │
-    │ hello │
-    └───────┘
+```response
+┌─x─────┐
+│ hello │
+│ world │
+│ hello │
+└───────┘
+```

 如果需要看到对应行的数值，则必须将 `Enum` 值转换为整数类型。

-    SELECT CAST(x, 'Int8') FROM t_enum
+```sql
+SELECT CAST(x, 'Int8') FROM t_enum
+```

-    ┌─CAST(x, 'Int8')─┐
-    │               1 │
-    │               2 │
-    │               1 │
-    └─────────────────┘
+```response
+┌─CAST(x, 'Int8')─┐
+│               1 │
+│               2 │
+│               1 │
+└─────────────────┘
+```

 在查询中创建枚举值，您还需要使用 `CAST`。

-    SELECT toTypeName(CAST('a', 'Enum8(\'a\' = 1, \'b\' = 2)'))
+```sql
+SELECT toTypeName(CAST('a', 'Enum8(\'a\' = 1, \'b\' = 2)'))
+```

-    ┌─toTypeName(CAST('a', 'Enum8(\'a\' = 1, \'b\' = 2)'))─┐
-    │ Enum8('a' = 1, 'b' = 2)                              │
-    └──────────────────────────────────────────────────────┘
+```response
+┌─toTypeName(CAST('a', 'Enum8(\'a\' = 1, \'b\' = 2)'))─┐
+│ Enum8('a' = 1, 'b' = 2)                              │
+└──────────────────────────────────────────────────────┘
+```

 ## 规则及用法 {#gui-ze-ji-yong-fa}

@ -72,15 +87,19 @@ slug: /zh/sql-reference/data-types/enum

 `Enum` 包含在 [可为空](nullable.md) 类型中。因此，如果您使用此查询创建一个表

-    CREATE TABLE t_enum_nullable
-    (
-        x Nullable( Enum8('hello' = 1, 'world' = 2) )
-    )
-    ENGINE = TinyLog
+```sql
+CREATE TABLE t_enum_nullable
+(
+    x Nullable( Enum8('hello' = 1, 'world' = 2) )
+)
+ENGINE = TinyLog
+```

 不仅可以存储 `'hello'` 和 `'world'` ，还可以存储 `NULL`。

-    INSERT INTO t_enum_nullable Values('hello'),('world'),(NULL)
+```sql
+INSERT INTO t_enum_nullable Values('hello'),('world'),(NULL)
+```

 在内存中，`Enum` 列的存储方式与相应数值的 `Int8` 或 `Int16` 相同。

--- a/docs/zh/sql-reference/data-types/special-data-types/nothing.md
+++ b/docs/zh/sql-reference/data-types/special-data-types/nothing.md
@ -9,11 +9,11 @@ slug: /zh/sql-reference/data-types/special-data-types/nothing

 `Nothing` 类型也可以用来表示空数组：

-``` bash
-:) SELECT toTypeName(array())
-
-SELECT toTypeName([])
+```sql
+SELECT toTypeName(array())
+```

+```response
 ┌─toTypeName(array())─┐
 │ Array(Nothing)      │
 └─────────────────────┘
--- a/docs/zh/sql-reference/data-types/tuple.md
+++ b/docs/zh/sql-reference/data-types/tuple.md
@ -17,17 +17,15 @@ slug: /zh/sql-reference/data-types/tuple

 创建元组的示例：

-    :) SELECT tuple(1,'a') AS x, toTypeName(x)
+```sql
+SELECT tuple(1,'a') AS x, toTypeName(x)
+```

-    SELECT
-        (1, 'a') AS x,
-        toTypeName(x)
-
-    ┌─x───────┬─toTypeName(tuple(1, 'a'))─┐
-    │ (1,'a') │ Tuple(UInt8, String)      │
-    └─────────┴───────────────────────────┘
-
-    1 rows in set. Elapsed: 0.021 sec.
+```response
+┌─x───────┬─toTypeName(tuple(1, 'a'))─┐
+│ (1,'a') │ Tuple(UInt8, String)      │
+└─────────┴───────────────────────────┘
+```

 ## 元组中的数据类型 {#yuan-zu-zhong-de-shu-ju-lei-xing}

@ -35,14 +33,12 @@ slug: /zh/sql-reference/data-types/tuple

 自动数据类型检测示例：

-    SELECT tuple(1, NULL) AS x, toTypeName(x)
+```sql
+SELECT tuple(1, NULL) AS x, toTypeName(x)
+```

-    SELECT
-        (1, NULL) AS x,
-        toTypeName(x)
-
-    ┌─x────────┬─toTypeName(tuple(1, NULL))──────┐
-    │ (1,NULL) │ Tuple(UInt8, Nullable(Nothing)) │
-    └──────────┴─────────────────────────────────┘
-
-    1 rows in set. Elapsed: 0.002 sec.
+```response
+┌─x────────┬─toTypeName(tuple(1, NULL))──────┐
+│ (1,NULL) │ Tuple(UInt8, Nullable(Nothing)) │
+└──────────┴─────────────────────────────────┘
+```
--- a/docs/zh/sql-reference/functions/functions-for-nulls.md
+++ b/docs/zh/sql-reference/functions/functions-for-nulls.md
@ -22,24 +22,24 @@ slug: /zh/sql-reference/functions/functions-for-nulls

 存在以下内容的表

-    ┌─x─┬────y─┐
-    │ 1 │ ᴺᵁᴸᴸ │
-    │ 2 │    3 │
-    └───┴──────┘
+```response
+┌─x─┬────y─┐
+│ 1 │ ᴺᵁᴸᴸ │
+│ 2 │    3 │
+└───┴──────┘
+```

 对其进行查询

-    :) SELECT x FROM t_null WHERE isNull(y)
+```sql
+SELECT x FROM t_null WHERE isNull(y)
+```

-    SELECT x
-    FROM t_null
-    WHERE isNull(y)
-
-    ┌─x─┐
-    │ 1 │
-    └───┘
-
-    1 rows in set. Elapsed: 0.010 sec.
+```response
+┌─x─┐
+│ 1 │
+└───┘
+```

 ## isNotNull {#isnotnull}

@ -60,24 +60,24 @@ slug: /zh/sql-reference/functions/functions-for-nulls

 存在以下内容的表

-    ┌─x─┬────y─┐
-    │ 1 │ ᴺᵁᴸᴸ │
-    │ 2 │    3 │
-    └───┴──────┘
+```response
+┌─x─┬────y─┐
+│ 1 │ ᴺᵁᴸᴸ │
+│ 2 │    3 │
+└───┴──────┘
+```

 对其进行查询

-    :) SELECT x FROM t_null WHERE isNotNull(y)
+```sql
+SELECT x FROM t_null WHERE isNotNull(y)
+```

-    SELECT x
-    FROM t_null
-    WHERE isNotNull(y)
-
-    ┌─x─┐
-    │ 2 │
-    └───┘
-
-    1 rows in set. Elapsed: 0.010 sec.
+```response
+┌─x─┐
+│ 2 │
+└───┘
+```

 ## 合并 {#coalesce}

@ -98,26 +98,27 @@ slug: /zh/sql-reference/functions/functions-for-nulls

 考虑可以指定多种联系客户的方式的联系人列表。

-    ┌─name─────┬─mail─┬─phone─────┬──icq─┐
-    │ client 1 │ ᴺᵁᴸᴸ │ 123-45-67 │  123 │
-    │ client 2 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ      │ ᴺᵁᴸᴸ │
-    └──────────┴──────┴───────────┴──────┘
+```response
+┌─name─────┬─mail─┬─phone─────┬──icq─┐
+│ client 1 │ ᴺᵁᴸᴸ │ 123-45-67 │  123 │
+│ client 2 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ      │ ᴺᵁᴸᴸ │
+└──────────┴──────┴───────────┴──────┘
+```

 `mail`和`phone`字段是String类型，但`icq`字段是`UInt32`，所以它需要转换为`String`。

 从联系人列表中获取客户的第一个可用联系方式：

-    :) SELECT coalesce(mail, phone, CAST(icq,'Nullable(String)')) FROM aBook
+```sql
+SELECT coalesce(mail, phone, CAST(icq,'Nullable(String)')) FROM aBook
+```

-    SELECT coalesce(mail, phone, CAST(icq, 'Nullable(String)'))
-    FROM aBook
-
-    ┌─name─────┬─coalesce(mail, phone, CAST(icq, 'Nullable(String)'))─┐
-    │ client 1 │ 123-45-67                                            │
-    │ client 2 │ ᴺᵁᴸᴸ                                                 │
-    └──────────┴──────────────────────────────────────────────────────┘
-
-    2 rows in set. Elapsed: 0.006 sec.
+```response
+┌─name─────┬─coalesce(mail, phone, CAST(icq, 'Nullable(String)'))─┐
+│ client 1 │ 123-45-67                                            │
+│ client 2 │ ᴺᵁᴸᴸ                                                 │
+└──────────┴──────────────────────────────────────────────────────┘
+```

 ## ifNull {#ifnull}

--- a/docs/zh/sql-reference/functions/other-functions.md
+++ b/docs/zh/sql-reference/functions/other-functions.md
@ -33,7 +33,7 @@ slug: /zh/sql-reference/functions/other-functions
 SELECT 'some/long/path/to/file' AS a, basename(a)
 ```

-``` text
+```response
 ┌─a──────────────────────┬─basename('some\\long\\path\\to\\file')─┐
 │ some\long\path\to\file │ file                                   │
 └────────────────────────┴────────────────────────────────────────┘
@ -43,7 +43,7 @@ SELECT 'some/long/path/to/file' AS a, basename(a)
 SELECT 'some\\long\\path\\to\\file' AS a, basename(a)
 ```

-``` text
+```response
 ┌─a──────────────────────┬─basename('some\\long\\path\\to\\file')─┐
 │ some\long\path\to\file │ file                                   │
 └────────────────────────┴────────────────────────────────────────┘
@ -53,7 +53,7 @@ SELECT 'some\\long\\path\\to\\file' AS a, basename(a)
 SELECT 'some-file-name' AS a, basename(a)
 ```

-``` text
+```response
 ┌─a──────────────┬─basename('some-file-name')─┐
 │ some-file-name │ some-file-name             │
 └────────────────┴────────────────────────────┘
@ -398,23 +398,25 @@ FROM

 **`toTypeName ' 与 ' toColumnTypeName`的区别示例**

-    :) select toTypeName(cast('2018-01-01 01:02:03' AS DateTime))
+```sql
+SELECT toTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))
+```

-    SELECT toTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))
+```response
+┌─toTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))─┐
+│ DateTime                                            │
+└─────────────────────────────────────────────────────┘
+```

-    ┌─toTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))─┐
-    │ DateTime                                            │
-    └─────────────────────────────────────────────────────┘
+```sql
+SELECT toColumnTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))
+```

-    1 rows in set. Elapsed: 0.008 sec.
-
-    :) select toColumnTypeName(cast('2018-01-01 01:02:03' AS DateTime))
-
-    SELECT toColumnTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))
-
-    ┌─toColumnTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))─┐
-    │ Const(UInt32)                                             │
-    └───────────────────────────────────────────────────────────┘
+```response
+┌─toColumnTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))─┐
+│ Const(UInt32)                                             │
+└───────────────────────────────────────────────────────────┘
+```

 该示例显示`DateTime`数据类型作为`Const(UInt32)`存储在内存中。

@ -460,26 +462,25 @@ FROM

 **示例**

-    :) SELECT defaultValueOfArgumentType( CAST(1 AS Int8) )
+```sql
+SELECT defaultValueOfArgumentType(CAST(1, 'Int8'))
+```

-    SELECT defaultValueOfArgumentType(CAST(1, 'Int8'))
+```response
+┌─defaultValueOfArgumentType(CAST(1, 'Int8'))─┐
+│                                           0 │
+└─────────────────────────────────────────────┘
+```

-    ┌─defaultValueOfArgumentType(CAST(1, 'Int8'))─┐
-    │                                           0 │
-    └─────────────────────────────────────────────┘
-
-    1 rows in set. Elapsed: 0.002 sec.
-
-    :) SELECT defaultValueOfArgumentType( CAST(1 AS Nullable(Int8) ) )
-
-    SELECT defaultValueOfArgumentType(CAST(1, 'Nullable(Int8)'))
-
-    ┌─defaultValueOfArgumentType(CAST(1, 'Nullable(Int8)'))─┐
-    │                                                  ᴺᵁᴸᴸ │
-    └───────────────────────────────────────────────────────┘
-
-    1 rows in set. Elapsed: 0.002 sec.
+```sql
+SELECT defaultValueOfArgumentType(CAST(1, 'Nullable(Int8)'))
+```

+```response
+┌─defaultValueOfArgumentType(CAST(1, 'Nullable(Int8)'))─┐
+│                                                  ᴺᵁᴸᴸ │
+└───────────────────────────────────────────────────────┘
+```

 ## indexHint  {#indexhint}
 输出符合索引选择范围内的所有数据，同时不实用参数中的表达式进行过滤。
@ -496,7 +497,8 @@ FROM

 ```
 SELECT count() FROM ontime
-
+```
+```response
 ┌─count()─┐
 │ 4276457 │
 └─────────┘
@ -506,9 +508,11 @@ SELECT count() FROM ontime

 对该表进行如下的查询：

+```sql
+SELECT FlightDate AS k, count() FROM ontime GROUP BY k ORDER BY k
 ```
-:) SELECT FlightDate AS k, count() FROM ontime GROUP BY k ORDER BY k

+```response
 SELECT
    FlightDate AS k,
    count()
@ -530,9 +534,11 @@ ORDER BY k ASC

 在这个查询中，由于没有使用索引，所以ClickHouse将处理整个表的所有数据(`Processed 4.28 million rows`)。使用下面的查询尝试使用索引进行查询：

+```sql
+SELECT FlightDate AS k, count() FROM ontime WHERE k = '2017-09-15' GROUP BY k ORDER BY k
 ```
-:) SELECT FlightDate AS k, count() FROM ontime WHERE k = '2017-09-15' GROUP BY k ORDER BY k

+```response
 SELECT
    FlightDate AS k,
    count()
@ -552,9 +558,11 @@ ORDER BY k ASC

 现在将表达式`k = '2017-09-15'`传递给`indexHint`函数：

+```sql
+SELECT FlightDate AS k, count() FROM ontime WHERE indexHint(k = '2017-09-15') GROUP BY k ORDER BY k
 ```
-:) SELECT FlightDate AS k, count() FROM ontime WHERE indexHint(k = '2017-09-15') GROUP BY k ORDER BY k

+```response
 SELECT
    FlightDate AS k,
    count()
--- a/docs/zh/sql-reference/functions/uuid-functions.md
+++ b/docs/zh/sql-reference/functions/uuid-functions.md
@ -21,13 +21,13 @@ UUID类型的值。

 此示例演示如何在表中创建UUID类型的列，并对其写入数据。

-``` sql
-:) CREATE TABLE t_uuid (x UUID) ENGINE=TinyLog
-
-:) INSERT INTO t_uuid SELECT generateUUIDv4()
-
-:) SELECT * FROM t_uuid
+```sql
+CREATE TABLE t_uuid (x UUID) ENGINE=TinyLog
+INSERT INTO t_uuid SELECT generateUUIDv4()
+SELECT * FROM t_uuid
+```

+```response
 ┌────────────────────────────────────x─┐
 │ f4bf890f-f9dc-4332-ad5c-0c18e73f28e9 │
 └──────────────────────────────────────┘
@ -47,9 +47,11 @@ UUID类型的值

 **使用示例**

-``` sql
-:) SELECT toUUID('61f0c404-5cb3-11e7-907b-a6006ad3dba0') AS uuid
+```sql
+SELECT toUUID('61f0c404-5cb3-11e7-907b-a6006ad3dba0') AS uuid
+```

+```response
 ┌─────────────────────────────────uuid─┐
 │ 61f0c404-5cb3-11e7-907b-a6006ad3dba0 │
 └──────────────────────────────────────┘
@ -70,10 +72,12 @@ UUIDStringToNum(String)
 **使用示例**

 ``` sql
-:) SELECT
+SELECT
    '612f3c40-5d3b-217e-707b-6a546a3d7b29' AS uuid,
    UUIDStringToNum(uuid) AS bytes
+```

+```response
 ┌─uuid─────────────────────────────────┬─bytes────────────┐
 │ 612f3c40-5d3b-217e-707b-6a546a3d7b29 │ a/<@];!~p{jTj={) │
 └──────────────────────────────────────┴──────────────────┘
@ -97,7 +101,8 @@ UUIDNumToString(FixedString(16))
 SELECT
    'a/<@];!~p{jTj={)' AS bytes,
    UUIDNumToString(toFixedString(bytes, 16)) AS uuid
-
+```
+```response
 ┌─bytes────────────┬─uuid─────────────────────────────────┐
 │ a/<@];!~p{jTj={) │ 612f3c40-5d3b-217e-707b-6a546a3d7b29 │
 └──────────────────┴──────────────────────────────────────┘
--- a/docs/zh/sql-reference/operators/index.md
+++ b/docs/zh/sql-reference/operators/index.md
@ -143,7 +143,7 @@ SELECT
 FROM test.Orders;
 ```

-``` text
+``` response
 ┌─OrderYear─┬─OrderMonth─┬─OrderDay─┬─OrderHour─┬─OrderMinute─┬─OrderSecond─┐
 │      2008 │         10 │       11 │        13 │          23 │          44 │
 └───────────┴────────────┴──────────┴───────────┴─────────────┴─────────────┘
@ -161,7 +161,7 @@ FROM test.Orders;
 SELECT now() AS current_date_time, current_date_time + INTERVAL 4 DAY + INTERVAL 3 HOUR
 ```

-``` text
+``` response
 ┌───current_date_time─┬─plus(plus(now(), toIntervalDay(4)), toIntervalHour(3))─┐
 │ 2019-10-23 11:16:28 │                                    2019-10-27 14:16:28 │
 └─────────────────────┴────────────────────────────────────────────────────────┘
@ -226,18 +226,14 @@ ClickHouse 支持 `IS NULL` 和 `IS NOT NULL` 。

 <!-- -->

-``` bash
-:) SELECT x+100 FROM t_null WHERE y IS NULL
-
-SELECT x + 100
-FROM t_null
-WHERE isNull(y)
+``` sql
+SELECT x+100 FROM t_null WHERE y IS NULL
+```

+``` response
 ┌─plus(x, 100)─┐
 │          101 │
 └──────────────┘
-
-1 rows in set. Elapsed: 0.002 sec.
 ```

 ### IS NOT NULL {#is-not-null}
@ -249,16 +245,12 @@ WHERE isNull(y)

 <!-- -->

-``` bash
-:) SELECT * FROM t_null WHERE y IS NOT NULL
-
-SELECT *
-FROM t_null
-WHERE isNotNull(y)
+``` sql
+SELECT * FROM t_null WHERE y IS NOT NULL
+```

+``` response
 ┌─x─┬─y─┐
 │ 2 │ 3 │
 └───┴───┘
-
-1 rows in set. Elapsed: 0.002 sec.
 ```
--- a/docs/zh/sql-reference/table-functions/format.md
+++ b/docs/zh/sql-reference/table-functions/format.md
@ -27,7 +27,7 @@ A table with data parsed from `data` argument according specified format and ext

 **Query:**
 ``` sql
-:) select * from format(JSONEachRow,
+SELECT * FROM format(JSONEachRow,
 $$
 {"a": "Hello", "b": 111}
 {"a": "World", "b": 123}
@ -38,7 +38,7 @@ $$)

 **Result:**

-```text
+```response
 ┌───b─┬─a─────┐
 │ 111 │ Hello │
 │ 123 │ World │
@ -49,8 +49,7 @@ $$)

 **Query:**
 ```sql
-
-:) desc format(JSONEachRow,
+DESC format(JSONEachRow,
 $$
 {"a": "Hello", "b": 111}
 {"a": "World", "b": 123}
@ -61,7 +60,7 @@ $$)

 **Result:**

-```text
+```response
 ┌─name─┬─type──────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
 │ b    │ Nullable(Float64) │              │                    │         │                  │                │
 │ a    │ Nullable(String)  │              │                    │         │                  │                │
--- a/src/Access/ExternalAuthenticators.cpp
+++ b/src/Access/ExternalAuthenticators.cpp
@ -10,7 +10,6 @@
 #include <optional>
 #include <utility>

-
 namespace DB
 {

@ -223,6 +222,7 @@ void parseKerberosParams(GSSAcceptorContext::Params & params, const Poco::Util::

    params.realm = config.getString("kerberos.realm", "");
    params.principal = config.getString("kerberos.principal", "");
+    params.keytab = config.getString("kerberos.keytab", "");
 }

 }
--- a/src/Access/GSSAcceptor.cpp
+++ b/src/Access/GSSAcceptor.cpp
@ -6,6 +6,7 @@

 #include <mutex>
 #include <tuple>
+#include <filesystem>


 namespace DB
@ -261,6 +262,15 @@ void GSSAcceptorContext::initHandles()

    resetHandles();

+    if (!params.keytab.empty())
+    {
+        if (!std::filesystem::exists(params.keytab))
+            throw Exception("Keytab file not found", ErrorCodes::BAD_ARGUMENTS);
+
+        if (krb5_gss_register_acceptor_identity(params.keytab.c_str()))
+            throw Exception("Failed to register keytab file", ErrorCodes::BAD_ARGUMENTS);
+    }
+
    if (!params.principal.empty())
    {
        if (!params.realm.empty())
--- a/src/Access/GSSAcceptor.h
+++ b/src/Access/GSSAcceptor.h
@ -9,6 +9,7 @@
 #if USE_KRB5
 #   include <gssapi/gssapi.h>
 #   include <gssapi/gssapi_ext.h>
+#   include <gssapi/gssapi_krb5.h>
 #   define MAYBE_NORETURN
 #else
 #   define MAYBE_NORETURN [[noreturn]]
@ -28,6 +29,7 @@ public:
        String mechanism = "1.2.840.113554.1.2.2"; // OID: krb5
        String principal;
        String realm;
+        String keytab;
    };

    explicit GSSAcceptorContext(const Params & params_);
--- a/src/Analyzer/IQueryTreeNode.cpp
+++ b/src/Analyzer/IQueryTreeNode.cpp
@ -214,6 +214,11 @@ IQueryTreeNode::Hash IQueryTreeNode::getTreeHash() const
 }

 QueryTreeNodePtr IQueryTreeNode::clone() const
+{
+    return cloneAndReplace({});
+}
+
+QueryTreeNodePtr IQueryTreeNode::cloneAndReplace(const ReplacementMap & replacement_map) const
 {
    /** Clone tree with this node as root.
      *
@ -236,11 +241,11 @@ QueryTreeNodePtr IQueryTreeNode::clone() const
        const auto [node_to_clone, place_for_cloned_node] = nodes_to_clone.back();
        nodes_to_clone.pop_back();

-        auto node_clone = node_to_clone->cloneImpl();
+        auto it = replacement_map.find(node_to_clone);
+        auto node_clone = it != replacement_map.end() ? it->second : node_to_clone->cloneImpl();
        *place_for_cloned_node = node_clone;

        node_clone->setAlias(node_to_clone->alias);
-        node_clone->setOriginalAST(node_to_clone->original_ast);
        node_clone->children = node_to_clone->children;
        node_clone->weak_pointers = node_to_clone->weak_pointers;

--- a/src/Analyzer/IQueryTreeNode.h
+++ b/src/Analyzer/IQueryTreeNode.h
@ -110,6 +110,13 @@ public:
    /// Get a deep copy of the query tree
    QueryTreeNodePtr clone() const;

+    /** Get a deep copy of the query tree.
+      * If node to clone is key in replacement map, then instead of clone it
+      * use value node from replacement map.
+      */
+    using ReplacementMap = std::unordered_map<const IQueryTreeNode *, QueryTreeNodePtr>;
+    QueryTreeNodePtr cloneAndReplace(const ReplacementMap & replacement_map) const;
+
    /// Returns true if node has alias, false otherwise
    bool hasAlias() const
    {
--- a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp
+++ b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp
@ -117,8 +117,9 @@ public:
            if (!function_name_if_constant_is_negative.empty() &&
                left_argument_constant_value_literal < zeroField(left_argument_constant_value_literal))
            {
-                resolveAggregateFunctionNode(*aggregate_function_node, function_name_if_constant_is_negative);
+                lower_function_name = function_name_if_constant_is_negative;
            }
+            resolveAggregateFunctionNode(*aggregate_function_node, inner_function_arguments_nodes[1], lower_function_name);

            auto inner_function = aggregate_function_arguments_nodes[0];
            auto inner_function_right_argument = std::move(inner_function_arguments_nodes[1]);
@ -133,8 +134,9 @@ public:
            if (!function_name_if_constant_is_negative.empty() &&
                right_argument_constant_value_literal < zeroField(right_argument_constant_value_literal))
            {
-                resolveAggregateFunctionNode(*aggregate_function_node, function_name_if_constant_is_negative);
+                lower_function_name = function_name_if_constant_is_negative;
            }
+            resolveAggregateFunctionNode(*aggregate_function_node, inner_function_arguments_nodes[0], function_name_if_constant_is_negative);

            auto inner_function = aggregate_function_arguments_nodes[0];
            auto inner_function_left_argument = std::move(inner_function_arguments_nodes[0]);
@ -145,13 +147,13 @@ public:
    }

 private:
-    static inline void resolveAggregateFunctionNode(FunctionNode & function_node, const String & aggregate_function_name)
+    static inline void resolveAggregateFunctionNode(FunctionNode & function_node, QueryTreeNodePtr & argument, const String & aggregate_function_name)
    {
        auto function_aggregate_function = function_node.getAggregateFunction();

        AggregateFunctionProperties properties;
        auto aggregate_function = AggregateFunctionFactory::instance().get(aggregate_function_name,
-            function_aggregate_function->getArgumentTypes(),
+            { argument->getResultType() },
            function_aggregate_function->getParameters(),
            properties);

--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@ -1695,7 +1695,7 @@ void QueryAnalyzer::evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & node, size
    subquery_context->setSettings(subquery_settings);

    auto options = SelectQueryOptions(QueryProcessingStage::Complete, subquery_depth, true /*is_subquery*/);
-    auto interpreter = std::make_unique<InterpreterSelectQueryAnalyzer>(node, options, subquery_context);
+    auto interpreter = std::make_unique<InterpreterSelectQueryAnalyzer>(node, subquery_context, options);

    auto io = interpreter->execute();

@ -2020,11 +2020,14 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveTableIdentifierFromDatabaseCatalog(con

    StorageID storage_id(database_name, table_name);
    storage_id = context->resolveStorageID(storage_id);
-    auto storage = DatabaseCatalog::instance().getTable(storage_id, context);
+    auto storage = DatabaseCatalog::instance().tryGetTable(storage_id, context);
+    if (!storage)
+        return {};
+
    auto storage_lock = storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout);
    auto storage_snapshot = storage->getStorageSnapshot(storage->getInMemoryMetadataPtr(), context);

-    return std::make_shared<TableNode>(std::move(storage), storage_lock, storage_snapshot);
+    return std::make_shared<TableNode>(std::move(storage), std::move(storage_lock), std::move(storage_snapshot));
 }

 /// Resolve identifier from compound expression
@ -2867,7 +2870,10 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifierInParentScopes(const

        if (resolved_identifier)
        {
-            bool is_cte = resolved_identifier->as<QueryNode>() && resolved_identifier->as<QueryNode>()->isCTE();
+            auto * subquery_node = resolved_identifier->as<QueryNode>();
+            auto * union_node = resolved_identifier->as<UnionNode>();
+
+            bool is_cte = (subquery_node && subquery_node->isCTE()) || (union_node && union_node->isCTE());

            /** From parent scopes we can resolve table identifiers only as CTE.
              * Example: SELECT (SELECT 1 FROM a) FROM test_table AS a;
@ -4084,8 +4090,6 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
        auto & in_second_argument = function_in_arguments_nodes[1];
        auto * table_node = in_second_argument->as<TableNode>();
        auto * table_function_node = in_second_argument->as<TableFunctionNode>();
-        auto * query_node = in_second_argument->as<QueryNode>();
-        auto * union_node = in_second_argument->as<UnionNode>();

        if (table_node && dynamic_cast<StorageSet *>(table_node->getStorage().get()) != nullptr)
        {
@ -4118,15 +4122,9 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi

            in_second_argument = std::move(in_second_argument_query_node);
        }
-        else if (query_node || union_node)
+        else
        {
-            IdentifierResolveScope subquery_scope(in_second_argument, &scope /*parent_scope*/);
-            subquery_scope.subquery_depth = scope.subquery_depth + 1;
-
-            if (query_node)
-                resolveQuery(in_second_argument, subquery_scope);
-            else if (union_node)
-                resolveUnion(in_second_argument, subquery_scope);
+            resolveExpressionNode(in_second_argument, scope, false /*allow_lambda_expression*/, true /*allow_table_expression*/);
        }
    }

@ -4714,13 +4712,29 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
            {
                node = tryResolveIdentifier({unresolved_identifier, IdentifierLookupContext::TABLE_EXPRESSION}, scope).resolved_identifier;

-                /// If table identifier is resolved as CTE clone it
-                bool resolved_as_cte = node && node->as<QueryNode>() && node->as<QueryNode>()->isCTE();
+                /// If table identifier is resolved as CTE clone it and resolve
+                auto * subquery_node = node->as<QueryNode>();
+                auto * union_node = node->as<UnionNode>();
+                bool resolved_as_cte = (subquery_node && subquery_node->isCTE()) || (union_node && union_node->isCTE());

                if (resolved_as_cte)
                {
                    node = node->clone();
-                    node->as<QueryNode &>().setIsCTE(false);
+                    subquery_node = node->as<QueryNode>();
+                    union_node = node->as<UnionNode>();
+
+                    if (subquery_node)
+                        subquery_node->setIsCTE(false);
+                    else
+                        union_node->setIsCTE(false);
+
+                    IdentifierResolveScope subquery_scope(node, &scope /*parent_scope*/);
+                    subquery_scope.subquery_depth = scope.subquery_depth + 1;
+
+                    if (subquery_node)
+                        resolveQuery(node, subquery_scope);
+                    else
+                        resolveUnion(node, subquery_scope);
                }
            }

@ -4836,6 +4850,9 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
            IdentifierResolveScope subquery_scope(node, &scope /*parent_scope*/);
            subquery_scope.subquery_depth = scope.subquery_depth + 1;

+            ++subquery_counter;
+            std::string projection_name = "_subquery_" + std::to_string(subquery_counter);
+
            if (node_type == QueryTreeNodeType::QUERY)
                resolveQuery(node, subquery_scope);
            else
@ -4844,9 +4861,8 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
            if (!allow_table_expression)
                evaluateScalarSubqueryIfNeeded(node, subquery_scope.subquery_depth, subquery_scope.context);

-            ++subquery_counter;
            if (result_projection_names.empty())
-                result_projection_names.push_back("_subquery_" + std::to_string(subquery_counter));
+                result_projection_names.push_back(std::move(projection_name));

            break;
        }
@ -5193,11 +5209,6 @@ void QueryAnalyzer::initializeQueryJoinTreeNode(QueryTreeNodePtr & join_tree_nod

                if (resolved_identifier_query_node || resolved_identifier_union_node)
                {
-                    if (resolved_identifier_query_node)
-                        resolved_identifier_query_node->setIsCTE(false);
-                    else
-                        resolved_identifier_union_node->setIsCTE(false);
-
                    if (table_expression_modifiers.has_value())
                    {
                        throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
@ -5434,14 +5445,7 @@ void QueryAnalyzer::resolveQueryJoinTreeNode(QueryTreeNodePtr & join_tree_node,
            [[fallthrough]];
        case QueryTreeNodeType::UNION:
        {
-            IdentifierResolveScope subquery_scope(join_tree_node, &scope);
-            subquery_scope.subquery_depth = scope.subquery_depth + 1;
-
-            if (from_node_type == QueryTreeNodeType::QUERY)
-                resolveQuery(join_tree_node, subquery_scope);
-            else if (from_node_type == QueryTreeNodeType::UNION)
-                resolveUnion(join_tree_node, subquery_scope);
-
+            resolveExpressionNode(join_tree_node, scope, false /*allow_lambda_expression*/, true /*allow_table_expression*/);
            break;
        }
        case QueryTreeNodeType::TABLE_FUNCTION:
--- a/src/Backups/BackupCoordinationReplicatedTables.cpp
+++ b/src/Backups/BackupCoordinationReplicatedTables.cpp
@ -78,9 +78,9 @@ public:
                throw Exception(
                    ErrorCodes::CANNOT_BACKUP_TABLE,
                    "Intersected parts detected: {} on replica {} and {} on replica {}",
-                    part.info.getPartName(),
+                    part.info.getPartNameForLogs(),
                    *part.replica_name,
-                    new_part_info.getPartName(),
+                    new_part_info.getPartNameForLogs(),
                    *replica_name);
            }
            ++last_it;
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -369,6 +369,10 @@ if (TARGET ch_contrib::crc32_s390x)
    target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::crc32_s390x)
 endif()

+if (TARGET ch_contrib::crc32-vpmsum)
+    target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::crc32-vpmsum)
+ endif()
+
 dbms_target_link_libraries(PUBLIC ch_contrib::abseil_swiss_tables)
 target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::abseil_swiss_tables)

@ -611,5 +615,10 @@ if (ENABLE_TESTS)
        target_link_libraries(unit_tests_dbms PRIVATE ch_contrib::yaml_cpp)
    endif()

+    if (TARGET ch_contrib::azure_sdk)
+        target_link_libraries(unit_tests_dbms PRIVATE ch_contrib::azure_sdk)
+    endif()
+
+
    add_check(unit_tests_dbms)
 endif ()
--- a/src/Client/QueryFuzzer.cpp
+++ b/src/Client/QueryFuzzer.cpp
@ -905,11 +905,51 @@ void QueryFuzzer::fuzz(ASTPtr & ast)
                select->where()->children.clear();
                select->setExpression(ASTSelectQuery::Expression::WHERE, {});
            }
+            else if (!select->prewhere().get())
+            {
+                if (fuzz_rand() % 50 == 0)
+                {
+                    select->setExpression(ASTSelectQuery::Expression::PREWHERE, select->where()->clone());
+
+                    if (fuzz_rand() % 2 == 0)
+                    {
+                        select->where()->children.clear();
+                        select->setExpression(ASTSelectQuery::Expression::WHERE, {});
+                    }
+                }
+            }
        }
        else if (fuzz_rand() % 50 == 0)
        {
            select->setExpression(ASTSelectQuery::Expression::WHERE, getRandomColumnLike());
        }
+
+        if (select->prewhere().get())
+        {
+            if (fuzz_rand() % 50 == 0)
+            {
+                select->prewhere()->children.clear();
+                select->setExpression(ASTSelectQuery::Expression::PREWHERE, {});
+            }
+            else if (!select->where().get())
+            {
+                if (fuzz_rand() % 50 == 0)
+                {
+                    select->setExpression(ASTSelectQuery::Expression::WHERE, select->prewhere()->clone());
+
+                    if (fuzz_rand() % 2 == 0)
+                    {
+                        select->prewhere()->children.clear();
+                        select->setExpression(ASTSelectQuery::Expression::PREWHERE, {});
+                    }
+                }
+            }
+        }
+        else if (fuzz_rand() % 50 == 0)
+        {
+            select->setExpression(ASTSelectQuery::Expression::PREWHERE, getRandomColumnLike());
+        }
+
        fuzzOrderByList(select->orderBy().get());

        fuzz(select->children);
--- a/src/Common/CancelToken.cpp
+++ b/src/Common/CancelToken.cpp
@ -0,0 +1,243 @@
+#include <Common/CancelToken.h>
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int THREAD_WAS_CANCELED;
+}
+}
+
+#ifdef OS_LINUX /// Because of futex
+
+#include <base/getThreadId.h>
+
+#include <linux/futex.h>
+#include <sys/types.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+namespace DB
+{
+
+namespace
+{
+    inline Int64 futexWait(void * address, UInt32 value)
+    {
+        return syscall(SYS_futex, address, FUTEX_WAIT_PRIVATE, value, nullptr, nullptr, 0);
+    }
+
+    inline Int64 futexWake(void * address, int count)
+    {
+        return syscall(SYS_futex, address, FUTEX_WAKE_PRIVATE, count, nullptr, nullptr, 0);
+    }
+}
+
+void CancelToken::Registry::insert(CancelToken * token)
+{
+    std::lock_guard<std::mutex> lock(mutex);
+    threads[token->thread_id] = token;
+}
+
+void CancelToken::Registry::remove(CancelToken * token)
+{
+    std::lock_guard<std::mutex> lock(mutex);
+    threads.erase(token->thread_id);
+}
+
+void CancelToken::Registry::signal(UInt64 tid)
+{
+    std::lock_guard<std::mutex> lock(mutex);
+    if (auto it = threads.find(tid); it != threads.end())
+        it->second->signalImpl();
+}
+
+void CancelToken::Registry::signal(UInt64 tid, int code, const String & message)
+{
+    std::lock_guard<std::mutex> lock(mutex);
+    if (auto it = threads.find(tid); it != threads.end())
+        it->second->signalImpl(code, message);
+}
+
+const std::shared_ptr<CancelToken::Registry> & CancelToken::Registry::instance()
+{
+    static std::shared_ptr<Registry> registry{new Registry()}; // shared_ptr is used to enforce correct destruction order of tokens and registry
+    return registry;
+}
+
+CancelToken::CancelToken()
+    : state(disabled)
+    , thread_id(getThreadId())
+    , registry(Registry::instance())
+{
+    registry->insert(this);
+}
+
+CancelToken::~CancelToken()
+{
+    registry->remove(this);
+}
+
+void CancelToken::signal(UInt64 tid)
+{
+    Registry::instance()->signal(tid);
+}
+
+void CancelToken::signal(UInt64 tid, int code, const String & message)
+{
+    Registry::instance()->signal(tid, code, message);
+}
+
+bool CancelToken::wait(UInt32 * address, UInt32 value)
+{
+    chassert((reinterpret_cast<UInt64>(address) & canceled) == 0); // An `address` must be 2-byte aligned
+    if (value & signaled) // Can happen after spurious wake-up due to cancel of other thread
+        return true; // Spin-wait unless signal is handled
+
+    UInt64 s = state.load();
+    while (true)
+    {
+        if (s & disabled)
+        {
+            // Start non-cancelable wait on futex. Spurious wake-up is possible.
+            futexWait(address, value);
+            return true; // Disabled - true is forced
+        }
+        if (s & canceled)
+            return false; // Has already been canceled
+        if (state.compare_exchange_strong(s, reinterpret_cast<UInt64>(address)))
+            break; // This futex has been "acquired" by this token
+    }
+
+    // Start cancelable wait. Spurious wake-up is possible.
+    futexWait(address, value);
+
+    // "Release" futex and check for cancellation
+    s = state.load();
+    while (true)
+    {
+        chassert((s & disabled) != disabled); // `disable()` must not be called from another thread
+        if (s & canceled)
+        {
+            if (s == canceled)
+                break; // Signaled; futex "release" has been done by the signaling thread
+            else
+            {
+                s = state.load();
+                continue; // To avoid race (may lead to futex destruction) we have to wait for signaling thread to finish
+            }
+        }
+        if (state.compare_exchange_strong(s, 0))
+            return true; // There was no cancellation; futex "released"
+    }
+
+    // Reset signaled bit
+    reinterpret_cast<std::atomic<UInt32> *>(address)->fetch_and(~signaled);
+    return false;
+}
+
+void CancelToken::raise()
+{
+    std::unique_lock lock(signal_mutex);
+    if (exception_code != 0)
+        throw DB::Exception(
+            std::exchange(exception_code, 0),
+            std::exchange(exception_message, {}));
+    else
+        throw DB::Exception(ErrorCodes::THREAD_WAS_CANCELED, "Thread was canceled");
+}
+
+void CancelToken::notifyOne(UInt32 * address)
+{
+    futexWake(address, 1);
+}
+
+void CancelToken::notifyAll(UInt32 * address)
+{
+    futexWake(address, INT_MAX);
+}
+
+void CancelToken::signalImpl()
+{
+    signalImpl(0, {});
+}
+
+std::mutex CancelToken::signal_mutex;
+
+void CancelToken::signalImpl(int code, const String & message)
+{
+    // Serialize all signaling threads to avoid races due to concurrent signal()/raise() calls
+    std::unique_lock lock(signal_mutex);
+
+    UInt64 s = state.load();
+    while (true)
+    {
+        if (s & canceled)
+            return; // Already canceled - don't signal twice
+        if (state.compare_exchange_strong(s, s | canceled))
+            break; // It is the canceling thread - should deliver signal if necessary
+    }
+
+    exception_code = code;
+    exception_message = message;
+
+    if ((s & disabled) == disabled)
+        return; // cancellation is disabled - just signal token for later, but don't wake
+    std::atomic<UInt32> * address = reinterpret_cast<std::atomic<UInt32> *>(s & disabled);
+    if (address == nullptr)
+        return; // Thread is currently not waiting on futex - wake-up not required
+
+    // Set signaled bit
+    UInt32 value = address->load();
+    while (true)
+    {
+        if (value & signaled) // Already signaled, just spin-wait until previous signal is handled by waiter
+            value = address->load();
+        else if (address->compare_exchange_strong(value, value | signaled))
+            break;
+    }
+
+    // Wake all threads waiting on `address`, one of them will be canceled and others will get spurious wake-ups
+    // Woken canceled thread will reset signaled bit
+    futexWake(address, INT_MAX);
+
+    // Signaling thread must remove address from state to notify canceled thread that `futexWake()` is done, thus `wake()` can return.
+    // Otherwise we may have race condition: signaling thread may try to wake futex that has been already destructed.
+    state.store(canceled);
+}
+
+Cancelable::Cancelable()
+{
+    CancelToken::local().reset();
+}
+
+Cancelable::~Cancelable()
+{
+    CancelToken::local().disable();
+}
+
+NonCancelable::NonCancelable()
+{
+    CancelToken::local().disable();
+}
+
+NonCancelable::~NonCancelable()
+{
+    CancelToken::local().enable();
+}
+
+}
+
+#else
+
+namespace DB
+{
+
+void CancelToken::raise()
+{
+    throw DB::Exception(ErrorCodes::THREAD_WAS_CANCELED, "Thread was canceled");
+}
+
+}
+
+#endif
--- a/src/Common/CancelToken.h
+++ b/src/Common/CancelToken.h
@ -0,0 +1,207 @@
+#pragma once
+
+#include <base/types.h>
+#include <base/defines.h>
+
+#include <Common/Exception.h>
+
+#ifdef OS_LINUX /// Because of futex
+
+#include <atomic>
+#include <mutex>
+#include <unordered_map>
+#include <memory>
+
+namespace DB
+{
+
+// Scoped object, enabling thread cancellation (cannot be nested).
+// Intended to be used once per cancelable task. It erases any previously held cancellation signal.
+// Note that by default thread is not cancelable.
+struct Cancelable
+{
+    Cancelable();
+    ~Cancelable();
+};
+
+// Scoped object, disabling thread cancellation (cannot be nested; must be inside `Cancelable` region)
+struct NonCancelable
+{
+    NonCancelable();
+    ~NonCancelable();
+};
+
+// Responsible for synchronization needed to deliver thread cancellation signal.
+// Basic building block for cancelable synchronization primitives.
+// Allows to perform cancelable wait on memory addresses (think futex)
+class CancelToken
+{
+public:
+    CancelToken();
+    CancelToken(const CancelToken &) = delete;
+    CancelToken(CancelToken &&) = delete;
+    CancelToken & operator=(const CancelToken &) = delete;
+    CancelToken & operator=(CancelToken &&) = delete;
+    ~CancelToken();
+
+    // Returns token for the current thread
+    static CancelToken & local()
+    {
+        static thread_local CancelToken token;
+        return token;
+    }
+
+    // Cancelable wait on memory address (futex word).
+    //   Thread will do atomic compare-and-sleep `*address == value`. Waiting will continue until `notify_one()`
+    //   or `notify_all()` will be called with the same `address` or calling thread will be canceled using `signal()`.
+    //   Note that spurious wake-ups are also possible due to cancellation of other waiters on the same `address`.
+    //   WARNING: `address` must be 2-byte aligned and `value` highest bit must be zero.
+    // Return value:
+    //   true - woken by either notify or spurious wakeup;
+    //   false - iff cancellation signal has been received.
+    // Implementation details:
+    //   It registers `address` inside token's `state` to allow other threads to wake this thread and deliver cancellation signal.
+    //   Highest bit of `*address` is used for guaranteed delivery of the signal, but is guaranteed to be zero on return due to cancellation.
+    // Intended to be called only by thread associated with this token.
+    bool wait(UInt32 * address, UInt32 value);
+
+    // Throws `DB::Exception` received from `signal()`. Call it if `wait()` returned false.
+    // Intended to be called only by thread associated with this token.
+    [[noreturn]] void raise();
+
+    // Regular wake by address (futex word). It does not interact with token in any way. We have it here to complement `wait()`.
+    // Can be called from any thread.
+    static void notifyOne(UInt32 * address);
+    static void notifyAll(UInt32 * address);
+
+    // Send cancel signal to thread with specified `tid`.
+    // If thread was waiting using `wait()` it will be woken up (unless cancellation is disabled).
+    // Can be called from any thread.
+    static void signal(UInt64 tid);
+    static void signal(UInt64 tid, int code, const String & message);
+
+    // Flag used to deliver cancellation into memory address to wake a thread.
+    // Note that most significant bit at `addresses` to be used with `wait()` is reserved.
+    static constexpr UInt32 signaled = 1u << 31u;
+
+private:
+    friend struct Cancelable;
+    friend struct NonCancelable;
+
+    // Restores initial state for token to be reused. See `Cancelable` struct.
+    // Intended to be called only by thread associated with this token.
+    void reset()
+    {
+        state.store(0);
+    }
+
+    // Enable thread cancellation. See `NonCancelable` struct.
+    // Intended to be called only by thread associated with this token.
+    void enable()
+    {
+        chassert((state.load() & disabled) == disabled);
+        state.fetch_and(~disabled);
+    }
+
+    // Disable thread cancellation. See `NonCancelable` struct.
+    // Intended to be called only by thread associated with this token.
+    void disable()
+    {
+        chassert((state.load() & disabled) == 0);
+        state.fetch_or(disabled);
+    }
+
+    // Singleton. Maps thread IDs to tokens.
+    struct Registry
+    {
+        std::mutex mutex;
+        std::unordered_map<UInt64, CancelToken*> threads; // By thread ID
+
+        void insert(CancelToken * token);
+        void remove(CancelToken * token);
+        void signal(UInt64 tid);
+        void signal(UInt64 tid, int code, const String & message);
+
+        static const std::shared_ptr<Registry> & instance();
+    };
+
+    // Cancels this token and wakes thread if necessary.
+    // Can be called from any thread.
+    void signalImpl();
+    void signalImpl(int code, const String & message);
+
+    // Lower bit: cancel signal received flag
+    static constexpr UInt64 canceled = 1;
+
+    // Upper bits - possible values:
+    // 1) all zeros: token is enabed, i.e. wait() call can return false, thread is not waiting on any address;
+    // 2) all ones: token is disabled, i.e. wait() call cannot be canceled;
+    // 3) specific `address`: token is enabled and thread is currently waiting on this `address`.
+    static constexpr UInt64 disabled = ~canceled;
+    static_assert(sizeof(UInt32 *) == sizeof(UInt64)); // State must be able to hold an address
+
+    // All signal handling logic should be globally serialized using this mutex
+    static std::mutex signal_mutex;
+
+    // Cancellation state
+    alignas(64) std::atomic<UInt64> state;
+    [[maybe_unused]] char padding[64 - sizeof(state)];
+
+    // Cancellation exception
+    int exception_code;
+    String exception_message;
+
+    // Token is permanently attached to a single thread. There is one-to-one mapping between threads and tokens.
+    const UInt64 thread_id;
+
+    // To avoid `Registry` destruction before last `Token` destruction
+    const std::shared_ptr<Registry> registry;
+};
+
+}
+
+#else
+
+// WARNING: We support cancelable synchronization primitives only on linux for now
+
+namespace DB
+{
+
+struct Cancelable
+{
+    Cancelable() = default;
+    ~Cancelable() = default;
+};
+
+struct NonCancelable
+{
+    NonCancelable() = default;
+    ~NonCancelable() = default;
+};
+
+class CancelToken
+{
+public:
+    CancelToken() = default;
+    CancelToken(const CancelToken &) = delete;
+    CancelToken(CancelToken &&) = delete;
+    CancelToken & operator=(const CancelToken &) = delete;
+    ~CancelToken() = default;
+
+    static CancelToken & local()
+    {
+        static CancelToken token;
+        return token;
+    }
+
+    bool wait(UInt32 *, UInt32) { return true; }
+    [[noreturn]] void raise();
+    static void notifyOne(UInt32 *) {}
+    static void notifyAll(UInt32 *) {}
+    static void signal(UInt64) {}
+    static void signal(UInt64, int, const String &) {}
+};
+
+}
+
+#endif
--- a/src/Common/CancelableSharedMutex.cpp
+++ b/src/Common/CancelableSharedMutex.cpp
@ -0,0 +1,115 @@
+#include <Common/CancelableSharedMutex.h>
+
+#ifdef OS_LINUX /// Because of futex
+
+#include <Common/futex.h>
+
+namespace DB
+{
+
+namespace
+{
+    inline bool cancelableWaitUpperFetch(std::atomic<UInt64> & address, UInt64 & value)
+    {
+        bool res = CancelToken::local().wait(upperHalfAddress(&address), upperHalf(value));
+        value = address.load();
+        return res;
+    }
+
+    inline bool cancelableWaitLowerFetch(std::atomic<UInt64> & address, UInt64 & value)
+    {
+        bool res = CancelToken::local().wait(lowerHalfAddress(&address), lowerHalf(value));
+        value = address.load();
+        return res;
+    }
+}
+
+CancelableSharedMutex::CancelableSharedMutex()
+    : state(0)
+    , waiters(0)
+{}
+
+void CancelableSharedMutex::lock()
+{
+    UInt64 value = state.load();
+    while (true)
+    {
+        if (value & writers)
+        {
+            waiters++;
+            if (!cancelableWaitUpperFetch(state, value))
+            {
+                waiters--;
+                CancelToken::local().raise();
+            }
+            else
+                waiters--;
+        }
+        else if (state.compare_exchange_strong(value, value | writers))
+            break;
+    }
+
+    value |= writers;
+    while (value & readers)
+    {
+        if (!cancelableWaitLowerFetch(state, value))
+        {
+            state.fetch_and(~writers);
+            futexWakeUpperAll(state);
+            CancelToken::local().raise();
+        }
+    }
+}
+
+bool CancelableSharedMutex::try_lock()
+{
+    UInt64 value = state.load();
+    return (value & (readers | writers)) == 0 && state.compare_exchange_strong(value, value | writers);
+}
+
+void CancelableSharedMutex::unlock()
+{
+    state.fetch_and(~writers);
+    if (waiters)
+        futexWakeUpperAll(state);
+}
+
+void CancelableSharedMutex::lock_shared()
+{
+    UInt64 value = state.load();
+    while (true)
+    {
+        if (value & writers)
+        {
+            waiters++;
+            if (!cancelableWaitUpperFetch(state, value))
+            {
+                waiters--;
+                CancelToken::local().raise();
+            }
+            else
+                waiters--;
+        }
+        else if (state.compare_exchange_strong(value, value + 1)) // overflow is not realistic
+            break;
+    }
+}
+
+bool CancelableSharedMutex::try_lock_shared()
+{
+    UInt64 value = state.load();
+    if (!(value & writers) && state.compare_exchange_strong(value, value + 1)) // overflow is not realistic
+        return true;
+    return false;
+}
+
+void CancelableSharedMutex::unlock_shared()
+{
+    UInt64 value = state.fetch_sub(1) - 1;
+    if ((value & (writers | readers)) == writers) // If writer is waiting and no more readers
+        futexWakeLowerOne(state); // Wake writer
+}
+
+}
+
+#endif
--- a/src/Common/CancelableSharedMutex.h
+++ b/src/Common/CancelableSharedMutex.h
@ -0,0 +1,64 @@
+#pragma once
+
+#include <shared_mutex>
+
+#ifdef OS_LINUX /// Because of futex
+
+#include <Common/CancelToken.h>
+#include <base/types.h>
+#include <base/defines.h>
+#include <atomic>
+
+namespace DB
+{
+
+// Reimplementation of `std::shared_mutex` that can interoperate with thread cancellation via `CancelToken::signal()`.
+// It has cancellation point on waiting during `lock()` and `shared_lock()`.
+// NOTE: It has NO cancellation points on fast code path, when locking does not require waiting.
+class TSA_CAPABILITY("CancelableSharedMutex") CancelableSharedMutex
+{
+public:
+    CancelableSharedMutex();
+    ~CancelableSharedMutex() = default;
+    CancelableSharedMutex(const CancelableSharedMutex &) = delete;
+    CancelableSharedMutex & operator=(const CancelableSharedMutex &) = delete;
+
+    // Exclusive ownership
+    void lock() TSA_ACQUIRE();
+    bool try_lock() TSA_TRY_ACQUIRE(true);
+    void unlock() TSA_RELEASE();
+
+    // Shared ownership
+    void lock_shared() TSA_ACQUIRE_SHARED();
+    bool try_lock_shared() TSA_TRY_ACQUIRE_SHARED(true);
+    void unlock_shared() TSA_RELEASE_SHARED();
+
+private:
+    // State 64-bits layout:
+    //    1b    -   31b   -    1b    -   31b
+    // signaled - writers - signaled - readers
+    // 63------------------------------------0
+    // Two 32-bit words are used for cancelable waiting, so each has its own separate signaled bit
+    static constexpr UInt64 readers = (1ull << 32ull) - 1ull - CancelToken::signaled;
+    static constexpr UInt64 readers_signaled = CancelToken::signaled;
+    static constexpr UInt64 writers = readers << 32ull;
+    static constexpr UInt64 writers_signaled = readers_signaled << 32ull;
+
+    alignas(64) std::atomic<UInt64> state;
+    std::atomic<UInt32> waiters;
+};
+
+}
+
+#else
+
+// WARNING: We support cancelable synchronization primitives only on linux for now
+
+namespace DB
+{
+
+using CancelableSharedMutex = std::shared_mutex;
+
+}
+
+#endif
--- a/src/Common/ErrorCodes.cpp
+++ b/src/Common/ErrorCodes.cpp
@ -645,6 +645,7 @@
    M(674, RESOURCE_NOT_FOUND) \
    M(675, CANNOT_PARSE_IPV4) \
    M(676, CANNOT_PARSE_IPV6) \
+    M(677, THREAD_WAS_CANCELED) \
    \
    M(999, KEEPER_EXCEPTION) \
    M(1000, POCO_EXCEPTION) \
--- a/src/Common/HashTable/Hash.h
+++ b/src/Common/HashTable/Hash.h
@ -48,6 +48,10 @@ inline DB::UInt64 intHash64(DB::UInt64 x)
 #include <arm_acle.h>
 #endif

+#if (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#include "vec_crc32.h"
+#endif
+
 #if defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
 #include <crc32-s390x.h>

@ -87,6 +91,8 @@ inline DB::UInt64 intHashCRC32(DB::UInt64 x)
    return _mm_crc32_u64(-1ULL, x);
 #elif defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
    return __crc32cd(-1U, x);
+#elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+    return crc32_ppc(-1U, reinterpret_cast<const unsigned char *>(&x), sizeof(x));
 #elif defined(__s390x__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
    return s390x_crc32(-1U, x)
 #else
@ -101,6 +107,8 @@ inline DB::UInt64 intHashCRC32(DB::UInt64 x, DB::UInt64 updated_value)
    return _mm_crc32_u64(updated_value, x);
 #elif defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
    return __crc32cd(static_cast<UInt32>(updated_value), x);
+#elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+    return crc32_ppc(updated_value, reinterpret_cast<const unsigned char *>(&x), sizeof(x));
 #elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
    return s390x_crc32(updated_value, x);
 #else
--- a/src/Common/SharedMutex.cpp
+++ b/src/Common/SharedMutex.cpp
@ -0,0 +1,85 @@
+#include <Common/SharedMutex.h>
+
+#ifdef OS_LINUX /// Because of futex
+
+#include <bit>
+
+#include <Common/futex.h>
+
+namespace DB
+{
+
+SharedMutex::SharedMutex()
+    : state(0)
+    , waiters(0)
+{}
+
+void SharedMutex::lock()
+{
+    UInt64 value = state.load();
+    while (true)
+    {
+        if (value & writers)
+        {
+            waiters++;
+            futexWaitUpperFetch(state, value);
+            waiters--;
+        }
+        else if (state.compare_exchange_strong(value, value | writers))
+            break;
+    }
+
+    value |= writers;
+    while (value & readers)
+        futexWaitLowerFetch(state, value);
+}
+
+bool SharedMutex::try_lock()
+{
+    UInt64 value = 0;
+    if (state.compare_exchange_strong(value, writers))
+        return true;
+    return false;
+}
+
+void SharedMutex::unlock()
+{
+    state.store(0);
+    if (waiters)
+        futexWakeUpperAll(state);
+}
+
+void SharedMutex::lock_shared()
+{
+    UInt64 value = state.load();
+    while (true)
+    {
+        if (value & writers)
+        {
+            waiters++;
+            futexWaitUpperFetch(state, value);
+            waiters--;
+        }
+        else if (state.compare_exchange_strong(value, value + 1))
+            break;
+    }
+}
+
+bool SharedMutex::try_lock_shared()
+{
+    UInt64 value = state.load();
+    if (!(value & writers) && state.compare_exchange_strong(value, value + 1))
+        return true;
+    return false;
+}
+
+void SharedMutex::unlock_shared()
+{
+    UInt64 value = state.fetch_sub(1) - 1;
+    if (value == writers)
+        futexWakeLowerOne(state); // Wake writer
+}
+
+}
+
+#endif
--- a/src/Common/SharedMutex.h
+++ b/src/Common/SharedMutex.h
@ -0,0 +1,52 @@
+#pragma once
+
+#include <shared_mutex>
+
+#ifdef OS_LINUX /// Because of futex
+
+#include <base/types.h>
+#include <base/defines.h>
+#include <atomic>
+
+namespace DB
+{
+
+// Faster implementation of `std::shared_mutex` based on a pair of futexes
+class TSA_CAPABILITY("SharedMutex") SharedMutex
+{
+public:
+    SharedMutex();
+    ~SharedMutex() = default;
+    SharedMutex(const SharedMutex &) = delete;
+    SharedMutex & operator=(const SharedMutex &) = delete;
+
+    // Exclusive ownership
+    void lock() TSA_ACQUIRE();
+    bool try_lock() TSA_TRY_ACQUIRE(true);
+    void unlock() TSA_RELEASE();
+
+    // Shared ownership
+    void lock_shared() TSA_ACQUIRE_SHARED();
+    bool try_lock_shared() TSA_TRY_ACQUIRE_SHARED(true);
+    void unlock_shared() TSA_RELEASE_SHARED();
+
+private:
+    static constexpr UInt64 readers = (1ull << 32ull) - 1ull; // Lower 32 bits of state
+    static constexpr UInt64 writers = ~readers; // Upper 32 bits of state
+
+    alignas(64) std::atomic<UInt64> state;
+    std::atomic<UInt32> waiters;
+};
+
+}
+
+#else
+
+namespace DB
+{
+
+using SharedMutex = std::shared_mutex;
+
+}
+
+#endif
--- a/src/Common/futex.h
+++ b/src/Common/futex.h
@ -0,0 +1,97 @@
+#pragma once
+
+#ifdef OS_LINUX
+
+#include <base/types.h>
+
+#include <bit>
+
+#include <linux/futex.h>
+#include <sys/types.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+namespace DB
+{
+
+inline Int64 futexWait(void * address, UInt32 value)
+{
+    return syscall(SYS_futex, address, FUTEX_WAIT_PRIVATE, value, nullptr, nullptr, 0);
+}
+
+inline Int64 futexWake(void * address, int count)
+{
+    return syscall(SYS_futex, address, FUTEX_WAKE_PRIVATE, count, nullptr, nullptr, 0);
+}
+
+inline void futexWaitFetch(std::atomic<UInt32> & address, UInt32 & value)
+{
+    futexWait(&address, value);
+    value = address.load();
+}
+
+inline void futexWakeOne(std::atomic<UInt32> & address)
+{
+    futexWake(&address, 1);
+}
+
+inline void futexWakeAll(std::atomic<UInt32> & address)
+{
+     futexWake(&address, INT_MAX);
+}
+
+inline constexpr UInt32 lowerHalf(UInt64 value)
+{
+    return static_cast<UInt32>(value & 0xffffffffull);
+}
+
+inline constexpr UInt32 upperHalf(UInt64 value)
+{
+    return static_cast<UInt32>(value >> 32ull);
+}
+
+inline UInt32 * lowerHalfAddress(void * address)
+{
+    return reinterpret_cast<UInt32 *>(address) + (std::endian::native == std::endian::big);
+}
+
+inline UInt32 * upperHalfAddress(void * address)
+{
+    return reinterpret_cast<UInt32 *>(address) + (std::endian::native == std::endian::little);
+}
+
+inline void futexWaitLowerFetch(std::atomic<UInt64> & address, UInt64 & value)
+{
+    futexWait(lowerHalfAddress(&address), lowerHalf(value));
+    value = address.load();
+}
+
+inline void futexWakeLowerOne(std::atomic<UInt64> & address)
+{
+    futexWake(lowerHalfAddress(&address), 1);
+}
+
+inline void futexWakeLowerAll(std::atomic<UInt64> & address)
+{
+    futexWake(lowerHalfAddress(&address), INT_MAX);
+}
+
+inline void futexWaitUpperFetch(std::atomic<UInt64> & address, UInt64 & value)
+{
+    futexWait(upperHalfAddress(&address), upperHalf(value));
+    value = address.load();
+}
+
+inline void futexWakeUpperOne(std::atomic<UInt64> & address)
+{
+    futexWake(upperHalfAddress(&address), 1);
+}
+
+inline void futexWakeUpperAll(std::atomic<UInt64> & address)
+{
+    futexWake(upperHalfAddress(&address), INT_MAX);
+}
+
+}
+
+#endif
--- a/src/Common/tests/gtest_threading.cpp
+++ b/src/Common/tests/gtest_threading.cpp
@ -0,0 +1,371 @@
+#include <gtest/gtest.h>
+
+#include <thread>
+#include <condition_variable>
+#include <shared_mutex>
+#include <barrier>
+#include <atomic>
+
+#include "Common/Exception.h"
+#include <Common/CancelToken.h>
+#include <Common/SharedMutex.h>
+#include <Common/CancelableSharedMutex.h>
+#include <Common/Stopwatch.h>
+
+#include <base/demangle.h>
+#include <base/getThreadId.h>
+
+
+namespace DB
+{
+    namespace ErrorCodes
+    {
+        extern const int THREAD_WAS_CANCELED;
+    }
+}
+
+struct NoCancel {};
+
+// for all PerfTests
+static constexpr int requests = 512 * 1024;
+static constexpr int max_threads = 16;
+
+template <class T, class Status = NoCancel>
+void TestSharedMutex()
+{
+    // Test multiple readers can acquire lock
+    for (int readers = 1; readers <= 128; readers *= 2)
+    {
+        T sm;
+        std::atomic<int> test(0);
+        std::barrier sync(readers + 1);
+
+        std::vector<std::thread> threads;
+        threads.reserve(readers);
+        auto reader = [&]
+        {
+            [[maybe_unused]] Status status;
+            std::shared_lock lock(sm);
+            sync.arrive_and_wait();
+            test++;
+        };
+
+        for (int i = 0; i < readers; i++)
+            threads.emplace_back(reader);
+
+        { // writer
+            [[maybe_unused]] Status status;
+            sync.arrive_and_wait(); // wait for all reader to acquire lock to avoid blocking them
+            std::unique_lock lock(sm);
+            test++;
+        }
+
+        for (auto & thread : threads)
+            thread.join();
+
+        ASSERT_EQ(test, readers + 1);
+    }
+
+    // Test multiple writers cannot acquire lock simultaneously
+    for (int writers = 1; writers <= 128; writers *= 2)
+    {
+        T sm;
+        int test = 0;
+        std::barrier sync(writers);
+        std::vector<std::thread> threads;
+
+        threads.reserve(writers);
+        auto writer = [&]
+        {
+            [[maybe_unused]] Status status;
+            sync.arrive_and_wait();
+            std::unique_lock lock(sm);
+            test++;
+        };
+
+        for (int i = 0; i < writers; i++)
+            threads.emplace_back(writer);
+
+        for (auto & thread : threads)
+            thread.join();
+
+        ASSERT_EQ(test, writers);
+    }
+}
+
+template <class T, class Status = NoCancel>
+void TestSharedMutexCancelReader()
+{
+    static constexpr int readers = 8;
+    static constexpr int tasks_per_reader = 32;
+
+    T sm;
+    std::atomic<int> successes(0);
+    std::atomic<int> cancels(0);
+    std::barrier sync(readers + 1);
+    std::barrier cancel_sync(readers / 2 + 1);
+    std::vector<std::thread> threads;
+
+    std::mutex m;
+    std::vector<UInt64> tids_to_cancel;
+
+    threads.reserve(readers);
+    auto reader = [&] (int reader_id)
+    {
+        if (reader_id % 2 == 0)
+        {
+            std::unique_lock lock(m);
+            tids_to_cancel.emplace_back(getThreadId());
+        }
+        for (int task = 0; task < tasks_per_reader; task++) {
+            try
+            {
+                [[maybe_unused]] Status status;
+                sync.arrive_and_wait(); // (A) sync with writer
+                sync.arrive_and_wait(); // (B) wait for writer to acquire unique_lock
+                std::shared_lock lock(sm);
+                successes++;
+            }
+            catch (DB::Exception & e)
+            {
+                ASSERT_EQ(e.code(), DB::ErrorCodes::THREAD_WAS_CANCELED);
+                ASSERT_EQ(e.message(), "test");
+                cancels++;
+                cancel_sync.arrive_and_wait(); // (C) sync with writer
+            }
+        }
+    };
+
+    for (int reader_id = 0; reader_id < readers; reader_id++)
+        threads.emplace_back(reader, reader_id);
+
+    { // writer
+        [[maybe_unused]] Status status;
+        for (int task = 0; task < tasks_per_reader; task++) {
+            sync.arrive_and_wait(); // (A) wait for readers to finish previous task
+            ASSERT_EQ(cancels + successes, task * readers);
+            ASSERT_EQ(cancels, task * readers / 2);
+            ASSERT_EQ(successes, task * readers / 2);
+            std::unique_lock lock(sm);
+            sync.arrive_and_wait(); // (B) sync with readers
+            //std::unique_lock lock(m); // not needed, already synced using barrier
+            for (UInt64 tid : tids_to_cancel)
+                DB::CancelToken::signal(tid, DB::ErrorCodes::THREAD_WAS_CANCELED, "test");
+
+            // This sync is crucial. It is needed to hold `lock` long enough.
+            // It guarantees that every canceled thread will find `sm` blocked by writer, and thus will begin to wait.
+            // Wait() call is required for cancellation. Otherwise, fastpath acquire w/o wait will not generate exception.
+            // And this is the desired behaviour.
+            cancel_sync.arrive_and_wait(); // (C) wait for cancellation to finish, before unlock.
+        }
+    }
+
+    for (auto & thread : threads)
+        thread.join();
+
+    ASSERT_EQ(successes, tasks_per_reader * readers / 2);
+    ASSERT_EQ(cancels, tasks_per_reader * readers / 2);
+}
+
+template <class T, class Status = NoCancel>
+void TestSharedMutexCancelWriter()
+{
+    static constexpr int writers = 8;
+    static constexpr int tasks_per_writer = 32;
+
+    T sm;
+    std::atomic<int> successes(0);
+    std::atomic<int> cancels(0);
+    std::barrier sync(writers);
+    std::vector<std::thread> threads;
+
+    std::mutex m;
+    std::vector<UInt64> all_tids;
+
+    threads.reserve(writers);
+    auto writer = [&]
+    {
+        {
+            std::unique_lock lock(m);
+            all_tids.emplace_back(getThreadId());
+        }
+        for (int task = 0; task < tasks_per_writer; task++) {
+            try
+            {
+                [[maybe_unused]] Status status;
+                sync.arrive_and_wait(); // (A) sync all threads before race to acquire the lock
+                std::unique_lock lock(sm);
+                successes++;
+                // Thread that managed to acquire the lock cancels all other waiting writers
+                //std::unique_lock lock(m); // not needed, already synced using barrier
+                for (UInt64 tid : all_tids)
+                {
+                    if (tid != getThreadId())
+                        DB::CancelToken::signal(tid, DB::ErrorCodes::THREAD_WAS_CANCELED, "test");
+                }
+
+                // This sync is crucial. It is needed to hold `lock` long enough.
+                // It guarantees that every canceled thread will find `sm` blocked, and thus will begin to wait.
+                // Wait() call is required for cancellation. Otherwise, fastpath acquire w/o wait will not generate exception.
+                // And this is the desired behaviour.
+                sync.arrive_and_wait(); // (B) wait for cancellation to finish, before unlock.
+            }
+            catch (DB::Exception & e)
+            {
+                ASSERT_EQ(e.code(), DB::ErrorCodes::THREAD_WAS_CANCELED);
+                ASSERT_EQ(e.message(), "test");
+                cancels++;
+                sync.arrive_and_wait(); // (B) sync with race winner
+            }
+        }
+    };
+
+    for (int writer_id = 0; writer_id < writers; writer_id++)
+        threads.emplace_back(writer);
+
+    for (auto & thread : threads)
+        thread.join();
+
+    ASSERT_EQ(successes, tasks_per_writer);
+    ASSERT_EQ(cancels, tasks_per_writer * (writers - 1));
+}
+
+template <class T, class Status = NoCancel>
+void PerfTestSharedMutexReadersOnly()
+{
+    std::cout << "*** " << demangle(typeid(T).name()) << "/" << demangle(typeid(Status).name()) << " ***" << std::endl;
+
+    for (int thrs = 1; thrs <= max_threads; thrs *= 2)
+    {
+        T sm;
+        std::vector<std::thread> threads;
+        threads.reserve(thrs);
+        auto reader = [&]
+        {
+            [[maybe_unused]] Status status;
+            for (int request = requests / thrs; request; request--)
+            {
+                std::shared_lock lock(sm);
+            }
+        };
+
+        Stopwatch watch;
+        for (int i = 0; i < thrs; i++)
+            threads.emplace_back(reader);
+
+        for (auto & thread : threads)
+            thread.join();
+
+        double ns = watch.elapsedNanoseconds();
+        std::cout << "thrs = " << thrs << ":\t" << ns / requests << " ns\t" << requests * 1e9 / ns << " rps" << std::endl;
+    }
+}
+
+template <class T, class Status = NoCancel>
+void PerfTestSharedMutexWritersOnly()
+{
+    std::cout << "*** " << demangle(typeid(T).name()) << "/" << demangle(typeid(Status).name()) << " ***" << std::endl;
+
+    for (int thrs = 1; thrs <= max_threads; thrs *= 2)
+    {
+        int counter = 0;
+        T sm;
+        std::vector<std::thread> threads;
+        threads.reserve(thrs);
+        auto writer = [&]
+        {
+            [[maybe_unused]] Status status;
+            for (int request = requests / thrs; request; request--)
+            {
+                std::unique_lock lock(sm);
+                ASSERT_TRUE(counter % 2 == 0);
+                counter++;
+                std::atomic_signal_fence(std::memory_order::seq_cst); // force compiler to generate two separate increment instructions
+                counter++;
+            }
+        };
+
+        Stopwatch watch;
+        for (int i = 0; i < thrs; i++)
+            threads.emplace_back(writer);
+
+        for (auto & thread : threads)
+            thread.join();
+
+        ASSERT_EQ(counter, requests * 2);
+
+        double ns = watch.elapsedNanoseconds();
+        std::cout << "thrs = " << thrs << ":\t" << ns / requests << " ns\t" << requests * 1e9 / ns << " rps" << std::endl;
+    }
+}
+
+template <class T, class Status = NoCancel>
+void PerfTestSharedMutexRW()
+{
+    std::cout << "*** " << demangle(typeid(T).name()) << "/" << demangle(typeid(Status).name()) << " ***" << std::endl;
+
+    for (int thrs = 1; thrs <= max_threads; thrs *= 2)
+    {
+        int counter = 0;
+        T sm;
+        std::vector<std::thread> threads;
+        threads.reserve(thrs);
+        auto reader = [&]
+        {
+            [[maybe_unused]] Status status;
+            for (int request = requests / thrs / 2; request; request--)
+            {
+                {
+                    std::shared_lock lock(sm);
+                    ASSERT_TRUE(counter % 2 == 0);
+                }
+                {
+                    std::unique_lock lock(sm);
+                    ASSERT_TRUE(counter % 2 == 0);
+                    counter++;
+                    std::atomic_signal_fence(std::memory_order::seq_cst); // force compiler to generate two separate increment instructions
+                    counter++;
+                }
+            }
+        };
+
+        Stopwatch watch;
+        for (int i = 0; i < thrs; i++)
+            threads.emplace_back(reader);
+
+        for (auto & thread : threads)
+            thread.join();
+
+        ASSERT_EQ(counter, requests);
+
+        double ns = watch.elapsedNanoseconds();
+        std::cout << "thrs = " << thrs << ":\t" << ns / requests << " ns\t" << requests * 1e9 / ns << " rps" << std::endl;
+    }
+}
+
+TEST(Threading, SharedMutexSmokeCancelableEnabled) { TestSharedMutex<DB::CancelableSharedMutex, DB::Cancelable>(); }
+TEST(Threading, SharedMutexSmokeCancelableDisabled) { TestSharedMutex<DB::CancelableSharedMutex>(); }
+TEST(Threading, SharedMutexSmokeFast) { TestSharedMutex<DB::SharedMutex>(); }
+TEST(Threading, SharedMutexSmokeStd) { TestSharedMutex<std::shared_mutex>(); }
+
+TEST(Threading, PerfTestSharedMutexReadersOnlyCancelableEnabled) { PerfTestSharedMutexReadersOnly<DB::CancelableSharedMutex, DB::Cancelable>(); }
+TEST(Threading, PerfTestSharedMutexReadersOnlyCancelableDisabled) { PerfTestSharedMutexReadersOnly<DB::CancelableSharedMutex>(); }
+TEST(Threading, PerfTestSharedMutexReadersOnlyFast) { PerfTestSharedMutexReadersOnly<DB::SharedMutex>(); }
+TEST(Threading, PerfTestSharedMutexReadersOnlyStd) { PerfTestSharedMutexReadersOnly<std::shared_mutex>(); }
+
+TEST(Threading, PerfTestSharedMutexWritersOnlyCancelableEnabled) { PerfTestSharedMutexWritersOnly<DB::CancelableSharedMutex, DB::Cancelable>(); }
+TEST(Threading, PerfTestSharedMutexWritersOnlyCancelableDisabled) { PerfTestSharedMutexWritersOnly<DB::CancelableSharedMutex>(); }
+TEST(Threading, PerfTestSharedMutexWritersOnlyFast) { PerfTestSharedMutexWritersOnly<DB::SharedMutex>(); }
+TEST(Threading, PerfTestSharedMutexWritersOnlyStd) { PerfTestSharedMutexWritersOnly<std::shared_mutex>(); }
+
+TEST(Threading, PerfTestSharedMutexRWCancelableEnabled) { PerfTestSharedMutexRW<DB::CancelableSharedMutex, DB::Cancelable>(); }
+TEST(Threading, PerfTestSharedMutexRWCancelableDisabled) { PerfTestSharedMutexRW<DB::CancelableSharedMutex>(); }
+TEST(Threading, PerfTestSharedMutexRWFast) { PerfTestSharedMutexRW<DB::SharedMutex>(); }
+TEST(Threading, PerfTestSharedMutexRWStd) { PerfTestSharedMutexRW<std::shared_mutex>(); }
+
+#ifdef OS_LINUX /// These tests require cancellability
+
+TEST(Threading, SharedMutexCancelReaderCancelableEnabled) { TestSharedMutexCancelReader<DB::CancelableSharedMutex, DB::Cancelable>(); }
+TEST(Threading, SharedMutexCancelWriterCancelableEnabled) { TestSharedMutexCancelWriter<DB::CancelableSharedMutex, DB::Cancelable>(); }
+
+#endif
--- a/src/Core/Block.cpp
+++ b/src/Core/Block.cpp
@ -91,14 +91,20 @@ static ReturnType checkColumnStructure(const ColumnWithTypeAndName & actual, con
                expected.dumpStructure()),
            code);

-    if (isColumnConst(*actual.column) && isColumnConst(*expected.column))
+    if (isColumnConst(*actual.column) && isColumnConst(*expected.column)
+        && !actual.column->empty() && !expected.column->empty()) /// don't check values in empty columns
    {
        Field actual_value = assert_cast<const ColumnConst &>(*actual.column).getField();
        Field expected_value = assert_cast<const ColumnConst &>(*expected.column).getField();

        if (actual_value != expected_value)
-            return onError<ReturnType>("Block structure mismatch in " + std::string(context_description) + " stream: different values of constants, actual: "
-                + applyVisitor(FieldVisitorToString(), actual_value) + ", expected: " + applyVisitor(FieldVisitorToString(), expected_value),
+            return onError<ReturnType>(
+                fmt::format(
+                    "Block structure mismatch in {} stream: different values of constants in column '{}': actual: {}, expected: {}",
+                    context_description,
+                    actual.name,
+                    applyVisitor(FieldVisitorToString(), actual_value),
+                    applyVisitor(FieldVisitorToString(), expected_value)),
                code);
    }

--- a/src/Core/ExternalTable.cpp
+++ b/src/Core/ExternalTable.cpp
@ -126,6 +126,10 @@ ExternalTable::ExternalTable(const boost::program_options::variables_map & exter

 void ExternalTablesHandler::handlePart(const Poco::Net::MessageHeader & header, ReadBuffer & stream)
 {
+    /// After finishing this function we will be ready to receive the next file, for this we clear all the information received.
+    /// We should use SCOPE_EXIT because read_buffer should be reset correctly if there will be an exception.
+    SCOPE_EXIT(clear());
+
    const Settings & settings = getContext()->getSettingsRef();

    if (settings.http_max_multipart_form_data_size)
@ -167,9 +171,6 @@ void ExternalTablesHandler::handlePart(const Poco::Net::MessageHeader & header,

    CompletedPipelineExecutor executor(pipeline);
    executor.execute();
-
-    /// We are ready to receive the next file, for this we clear all the information received
-    clear();
 }

 }
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -595,6 +595,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
    M(UInt64, function_range_max_elements_in_block, 500000000, "Maximum number of values generated by function 'range' per block of data (sum of array sizes for every row in a block, see also 'max_block_size' and 'min_insert_block_size_rows'). It is a safety threshold.", 0) \
    M(ShortCircuitFunctionEvaluation, short_circuit_function_evaluation, ShortCircuitFunctionEvaluation::ENABLE, "Setting for short-circuit function evaluation configuration. Possible values: 'enable' - use short-circuit function evaluation for functions that are suitable for it, 'disable' - disable short-circuit function evaluation, 'force_enable' - use short-circuit function evaluation for all functions.", 0) \
    \
+    M(LocalFSReadMethod, storage_file_read_method, LocalFSReadMethod::mmap, "Method of reading data from storage file, one of: read, pread, mmap.", 0) \
    M(String, local_filesystem_read_method, "pread_threadpool", "Method of reading data from local filesystem, one of: read, pread, mmap, pread_threadpool.", 0) \
    M(String, remote_filesystem_read_method, "threadpool", "Method of reading data from remote filesystem, one of: read, threadpool.", 0) \
    M(Bool, local_filesystem_read_prefetch, false, "Should use prefetching when reading data from local filesystem.", 0) \
--- a/src/Core/SettingsEnums.cpp
+++ b/src/Core/SettingsEnums.cpp
@ -162,4 +162,9 @@ IMPLEMENT_SETTING_ENUM(MsgPackUUIDRepresentation , ErrorCodes::BAD_ARGUMENTS,
 IMPLEMENT_SETTING_ENUM(Dialect, ErrorCodes::BAD_ARGUMENTS,
    {{"clickhouse", Dialect::clickhouse},
     {"kusto", Dialect::kusto}})
+
+IMPLEMENT_SETTING_ENUM(LocalFSReadMethod, ErrorCodes::BAD_ARGUMENTS,
+    {{"mmap", LocalFSReadMethod::mmap},
+     {"pread", LocalFSReadMethod::pread},
+     {"read", LocalFSReadMethod::read}})
 }
--- a/src/Core/SettingsEnums.h
+++ b/src/Core/SettingsEnums.h
@ -4,6 +4,7 @@
 #include <Core/Joins.h>
 #include <QueryPipeline/SizeLimits.h>
 #include <Formats/FormatSettings.h>
+#include <IO/ReadSettings.h>


 namespace DB
@ -191,4 +192,6 @@ enum class Dialect
 };

 DECLARE_SETTING_ENUM(Dialect)
+
+DECLARE_SETTING_ENUM(LocalFSReadMethod)
 }
--- a/src/DataTypes/transformTypesRecursively.cpp
+++ b/src/DataTypes/transformTypesRecursively.cpp
@ -16,7 +16,7 @@ TypeIndexesSet getTypesIndexes(const DataTypes & types)
    return type_indexes;
 }

-void transformTypesRecursively(DataTypes & types, std::function<void(DataTypes &, const TypeIndexesSet &)> transform_simple_types, std::function<void(DataTypes &, const TypeIndexesSet &)> transform_complex_types)
+void transformTypesRecursively(DataTypes & types, std::function<void(DataTypes &, TypeIndexesSet &)> transform_simple_types, std::function<void(DataTypes &, TypeIndexesSet &)> transform_complex_types)
 {
    TypeIndexesSet type_indexes = getTypesIndexes(types);

@ -166,7 +166,7 @@ void transformTypesRecursively(DataTypes & types, std::function<void(DataTypes &
 void callOnNestedSimpleTypes(DataTypePtr & type, std::function<void(DataTypePtr &)> callback)
 {
    DataTypes types = {type};
-    transformTypesRecursively(types, [callback](auto & data_types, const TypeIndexesSet &){ callback(data_types[0]); }, {});
+    transformTypesRecursively(types, [callback](auto & data_types, TypeIndexesSet &){ callback(data_types[0]); }, {});
 }

 }
--- a/src/DataTypes/transformTypesRecursively.h
+++ b/src/DataTypes/transformTypesRecursively.h
@ -12,7 +12,7 @@ namespace DB
 /// If not all types are the same complex type (Array/Map/Tuple), this function won't be called to nested types.
 /// Function transform_simple_types will be applied to resulting simple types after all recursive calls.
 /// Function transform_complex_types will be applied to complex types (Array/Map/Tuple) after recursive call to their nested types.
-void transformTypesRecursively(DataTypes & types, std::function<void(DataTypes &, const TypeIndexesSet &)> transform_simple_types, std::function<void(DataTypes &, const TypeIndexesSet &)> transform_complex_types);
+void transformTypesRecursively(DataTypes & types, std::function<void(DataTypes &, TypeIndexesSet &)> transform_simple_types, std::function<void(DataTypes &, TypeIndexesSet &)> transform_complex_types);

 void callOnNestedSimpleTypes(DataTypePtr & type, std::function<void(DataTypePtr &)> callback);

--- a/src/Disks/IO/createReadBufferFromFileBase.cpp
+++ b/src/Disks/IO/createReadBufferFromFileBase.cpp
@ -52,7 +52,12 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
    {
        try
        {
-            auto res = std::make_unique<MMapReadBufferFromFileWithCache>(*settings.mmap_cache, filename, 0, file_size.value_or(-1));
+            std::unique_ptr<MMapReadBufferFromFileWithCache> res;
+            if (file_size)
+                res = std::make_unique<MMapReadBufferFromFileWithCache>(*settings.mmap_cache, filename, 0, *file_size);
+            else
+                res = std::make_unique<MMapReadBufferFromFileWithCache>(*settings.mmap_cache, filename, 0);
+
            ProfileEvents::increment(ProfileEvents::CreatedReadBufferMMap);
            return res;
        }
@ -63,17 +68,17 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
        }
    }

-    auto create = [&](size_t buffer_size, int actual_flags)
+    auto create = [&](size_t buffer_size, size_t buffer_alignment, int actual_flags)
    {
        std::unique_ptr<ReadBufferFromFileBase> res;

        if (settings.local_fs_method == LocalFSReadMethod::read)
        {
-            res = std::make_unique<ReadBufferFromFile>(filename, buffer_size, actual_flags, existing_memory, alignment, file_size);
+            res = std::make_unique<ReadBufferFromFile>(filename, buffer_size, actual_flags, existing_memory, buffer_alignment, file_size);
        }
        else if (settings.local_fs_method == LocalFSReadMethod::pread || settings.local_fs_method == LocalFSReadMethod::mmap)
        {
-            res = std::make_unique<ReadBufferFromFilePReadWithDescriptorsCache>(filename, buffer_size, actual_flags, existing_memory, alignment, file_size);
+            res = std::make_unique<ReadBufferFromFilePReadWithDescriptorsCache>(filename, buffer_size, actual_flags, existing_memory, buffer_alignment, file_size);
        }
        else if (settings.local_fs_method == LocalFSReadMethod::pread_fake_async)
        {
@ -83,7 +88,7 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(

            auto & reader = context->getThreadPoolReader(Context::FilesystemReaderType::SYNCHRONOUS_LOCAL_FS_READER);
            res = std::make_unique<AsynchronousReadBufferFromFileWithDescriptorsCache>(
-                reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, alignment, file_size);
+                reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, buffer_alignment, file_size);
        }
        else if (settings.local_fs_method == LocalFSReadMethod::pread_threadpool)
        {
@ -93,7 +98,7 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(

            auto & reader = context->getThreadPoolReader(Context::FilesystemReaderType::ASYNCHRONOUS_LOCAL_FS_READER);
            res = std::make_unique<AsynchronousReadBufferFromFileWithDescriptorsCache>(
-                reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, alignment, file_size);
+                reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, buffer_alignment, file_size);
        }
        else
            throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown read method");
@ -124,11 +129,7 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(

        auto align_up = [=](size_t value) { return (value + min_alignment - 1) / min_alignment * min_alignment; };

-        if (alignment == 0)
-            alignment = min_alignment;
-        else if (alignment % min_alignment)
-            alignment = align_up(alignment);
-
+        size_t buffer_alignment = alignment == 0 ? min_alignment : align_up(alignment);
        size_t buffer_size = settings.local_fs_buffer_size;

        if (buffer_size % min_alignment)
@ -145,7 +146,7 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
        /// Attempt to open a file with O_DIRECT
        try
        {
-            std::unique_ptr<ReadBufferFromFileBase> res = create(buffer_size, flags | O_DIRECT);
+            std::unique_ptr<ReadBufferFromFileBase> res = create(buffer_size, buffer_alignment, flags | O_DIRECT);
            ProfileEvents::increment(ProfileEvents::CreatedReadBufferDirectIO);
            return res;
        }
@ -166,7 +167,7 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
    if (file_size.has_value() && *file_size < buffer_size)
        buffer_size = *file_size;

-    return create(buffer_size, flags);
+    return create(buffer_size, alignment, flags);
 }

 }
--- a/src/Disks/tests/gtest_azure_xml_reader.cpp
+++ b/src/Disks/tests/gtest_azure_xml_reader.cpp
@ -0,0 +1,25 @@
+#include <string>
+#include <vector>
+#include <Common/logger_useful.h>
+
+#include "config.h"
+
+#if USE_AZURE_BLOB_STORAGE
+
+#include <azure/storage/blobs.hpp>
+#include <azure/storage/common/internal/xml_wrapper.hpp>
+
+#include <gtest/gtest.h>
+
+
+TEST(AzureXMLWrapper, TestLeak)
+{
+    std::string str = "<hello>world</hello>";
+
+    Azure::Storage::_internal::XmlReader reader(str.c_str(), str.length());
+    Azure::Storage::_internal::XmlReader reader2(std::move(reader));
+    Azure::Storage::_internal::XmlReader reader3 = std::move(reader2);
+    reader3.Read();
+}
+
+#endif
--- a/src/Disks/tests/gtest_disk.cpp
+++ b/src/Disks/tests/gtest_disk.cpp
@ -7,49 +7,29 @@
 namespace fs = std::filesystem;


-template <typename T>
-DB::DiskPtr createDisk();
-
-
-template <>
-DB::DiskPtr createDisk<DB::DiskLocal>()
+DB::DiskPtr createDisk()
 {
    fs::create_directory("tmp/");
    return std::make_shared<DB::DiskLocal>("local_disk", "tmp/", 0);
 }

-
-template <typename T>
 void destroyDisk(DB::DiskPtr & disk)
-{
-    disk.reset();
-}
-
-
-template <>
-void destroyDisk<DB::DiskLocal>(DB::DiskPtr & disk)
 {
    disk.reset();
    fs::remove_all("tmp/");
 }

-
-template <typename T>
 class DiskTest : public testing::Test
 {
 public:
-    void SetUp() override { disk = createDisk<T>(); }
-    void TearDown() override { destroyDisk<T>(disk); }
+    void SetUp() override { disk = createDisk(); }
+    void TearDown() override { destroyDisk(disk); }

    DB::DiskPtr disk;
 };


-using DiskImplementations = testing::Types<DB::DiskLocal>;
-TYPED_TEST_SUITE(DiskTest, DiskImplementations);
-
-
-TYPED_TEST(DiskTest, createDirectories)
+TEST_F(DiskTest, createDirectories)
 {
    this->disk->createDirectories("test_dir1/");
    EXPECT_TRUE(this->disk->isDirectory("test_dir1/"));
@ -59,7 +39,7 @@ TYPED_TEST(DiskTest, createDirectories)
 }


-TYPED_TEST(DiskTest, writeFile)
+TEST_F(DiskTest, writeFile)
 {
    {
        std::unique_ptr<DB::WriteBuffer> out = this->disk->writeFile("test_file");
@ -77,7 +57,7 @@ TYPED_TEST(DiskTest, writeFile)
 }


-TYPED_TEST(DiskTest, readFile)
+TEST_F(DiskTest, readFile)
 {
    {
        std::unique_ptr<DB::WriteBuffer> out = this->disk->writeFile("test_file");
@ -112,7 +92,7 @@ TYPED_TEST(DiskTest, readFile)
 }


-TYPED_TEST(DiskTest, iterateDirectory)
+TEST_F(DiskTest, iterateDirectory)
 {
    this->disk->createDirectories("test_dir/nested_dir/");

--- a/src/Disks/tests/gtest_disk.h
+++ b/src/Disks/tests/gtest_disk.h
@ -3,14 +3,6 @@
 #include <Disks/DiskLocal.h>
 #include <Disks/IDisk.h>

-template <typename T>
 DB::DiskPtr createDisk();

-template <>
-DB::DiskPtr createDisk<DB::DiskLocal>();
-
-template <typename T>
 void destroyDisk(DB::DiskPtr & disk);
-
-template <>
-void destroyDisk<DB::DiskLocal>(DB::DiskPtr & disk);
--- a/src/Disks/tests/gtest_path_functions.cpp
+++ b/src/Disks/tests/gtest_path_functions.cpp
@ -3,7 +3,7 @@
 #include <Disks/IDisk.h>


-TEST(DiskTest, parentPath)
+TEST(DiskPathTest, parentPath)
 {
    EXPECT_EQ("", DB::parentPath("test_dir/"));
    EXPECT_EQ("test_dir/", DB::parentPath("test_dir/nested_dir/"));
@ -11,7 +11,7 @@ TEST(DiskTest, parentPath)
 }


-TEST(DiskTest, fileName)
+TEST(DiskPathTest, fileName)
 {
    EXPECT_EQ("test_file", DB::fileName("test_file"));
    EXPECT_EQ("nested_file", DB::fileName("test_dir/nested_file"));
--- a/src/Formats/JSONUtils.cpp
+++ b/src/Formats/JSONUtils.cpp
@ -131,19 +131,21 @@ namespace JSONUtils
    {
        skipWhitespaceIfAny(in);
        assertChar('{', in);
+        skipWhitespaceIfAny(in);
        bool first = true;
        NamesAndTypesList names_and_types;
        String field;
        while (!in.eof() && *in.position() != '}')
        {
            if (!first)
-                skipComma(in);
+                assertChar(',', in);
            else
                first = false;

            auto name = readFieldName(in);
            auto type = tryInferDataTypeForSingleJSONField(in, settings, inference_info);
            names_and_types.emplace_back(name, type);
+            skipWhitespaceIfAny(in);
        }

        if (in.eof())
@ -157,17 +159,19 @@ namespace JSONUtils
    {
        skipWhitespaceIfAny(in);
        assertChar('[', in);
+        skipWhitespaceIfAny(in);
        bool first = true;
        DataTypes types;
        String field;
        while (!in.eof() && *in.position() != ']')
        {
            if (!first)
-                skipComma(in);
+                assertChar(',', in);
            else
                first = false;
            auto type = tryInferDataTypeForSingleJSONField(in, settings, inference_info);
            types.push_back(std::move(type));
+            skipWhitespaceIfAny(in);
        }

        if (in.eof())
--- a/src/Formats/SchemaInferenceUtils.cpp
+++ b/src/Formats/SchemaInferenceUtils.cpp
@ -44,9 +44,16 @@ namespace
        return true;
    }

+    void updateTypeIndexes(DataTypes & data_types, TypeIndexesSet & type_indexes)
+    {
+        type_indexes.clear();
+        for (const auto & type : data_types)
+            type_indexes.insert(type->getTypeId());
+    }
+
    /// If we have both Nothing and non Nothing types, convert all Nothing types to the first non Nothing.
    /// For example if we have types [Nothing, String, Nothing] we change it to [String, String, String]
-    void transformNothingSimpleTypes(DataTypes & data_types, const TypeIndexesSet & type_indexes)
+    void transformNothingSimpleTypes(DataTypes & data_types, TypeIndexesSet & type_indexes)
    {
        /// Check if we have both Nothing and non Nothing types.
        if (!type_indexes.contains(TypeIndex::Nothing) || type_indexes.size() <= 1)
@ -67,24 +74,48 @@ namespace
            if (isNothing(type))
                type = not_nothing_type;
        }
+
+        type_indexes.erase(TypeIndex::Nothing);
    }

-    /// If we have both Int64 and Float64 types, convert all Int64 to Float64.
-    void transformIntegersAndFloatsToFloats(DataTypes & data_types, const TypeIndexesSet & type_indexes)
+    /// If we have both Int64 and UInt64, convert all Int64 to UInt64,
+    /// because UInt64 is inferred only in case of Int64 overflow.
+    void transformIntegers(DataTypes & data_types, TypeIndexesSet & type_indexes)
    {
-        if (!type_indexes.contains(TypeIndex::Int64) || !type_indexes.contains(TypeIndex::Float64))
+        if (!type_indexes.contains(TypeIndex::Int64) || !type_indexes.contains(TypeIndex::UInt64))
            return;

        for (auto & type : data_types)
        {
-            if (isInteger(type))
+            if (WhichDataType(type).isInt64())
+                type = std::make_shared<DataTypeUInt64>();
+        }
+
+        type_indexes.erase(TypeIndex::Int64);
+    }
+
+    /// If we have both Int64 and Float64 types, convert all Int64 to Float64.
+    void transformIntegersAndFloatsToFloats(DataTypes & data_types, TypeIndexesSet & type_indexes)
+    {
+        bool have_floats = type_indexes.contains(TypeIndex::Float64);
+        bool have_integers = type_indexes.contains(TypeIndex::Int64) || type_indexes.contains(TypeIndex::UInt64);
+        if (!have_integers || !have_floats)
+            return;
+
+        for (auto & type : data_types)
+        {
+            WhichDataType which(type);
+            if (which.isInt64() || which.isUInt64())
                type = std::make_shared<DataTypeFloat64>();
        }
+
+        type_indexes.erase(TypeIndex::Int64);
+        type_indexes.erase(TypeIndex::UInt64);
    }

    /// If we have only Date and DateTime types, convert Date to DateTime,
    /// otherwise, convert all Date and DateTime to String.
-    void transformDatesAndDateTimes(DataTypes & data_types, const TypeIndexesSet & type_indexes)
+    void transformDatesAndDateTimes(DataTypes & data_types, TypeIndexesSet & type_indexes)
    {
        bool have_dates = type_indexes.contains(TypeIndex::Date);
        bool have_datetimes = type_indexes.contains(TypeIndex::DateTime64);
@ -98,6 +129,8 @@ namespace
                    type = std::make_shared<DataTypeString>();
            }

+            type_indexes.erase(TypeIndex::Date);
+            type_indexes.erase(TypeIndex::DateTime);
            return;
        }

@ -108,16 +141,18 @@ namespace
                if (isDate(type))
                    type = std::make_shared<DataTypeDateTime64>(9);
            }
+
+            type_indexes.erase(TypeIndex::Date);
        }
    }

-    /// If we have numbers (Int64/Float64) and String types and numbers were parsed from String,
+    /// If we have numbers (Int64/UInt64/Float64) and String types and numbers were parsed from String,
    /// convert all numbers to String.
    void transformJSONNumbersBackToString(
-        DataTypes & data_types, const FormatSettings & settings, const TypeIndexesSet & type_indexes, JSONInferenceInfo * json_info)
+        DataTypes & data_types, const FormatSettings & settings, TypeIndexesSet & type_indexes, JSONInferenceInfo * json_info)
    {
        bool have_strings = type_indexes.contains(TypeIndex::String);
-        bool have_numbers = type_indexes.contains(TypeIndex::Int64) || type_indexes.contains(TypeIndex::Float64);
+        bool have_numbers = type_indexes.contains(TypeIndex::Int64) || type_indexes.contains(TypeIndex::UInt64) || type_indexes.contains(TypeIndex::Float64);
        if (!have_strings || !have_numbers)
            return;

@ -128,36 +163,43 @@ namespace
                    || json_info->numbers_parsed_from_json_strings.contains(type.get())))
                type = std::make_shared<DataTypeString>();
        }
+
+        updateTypeIndexes(data_types, type_indexes);
    }

-    /// If we have both Bool and number (Int64/Float64) types,
-    /// convert all Bool to Int64/Float64.
-    void transformBoolsAndNumbersToNumbers(DataTypes & data_types, const TypeIndexesSet & type_indexes)
+    /// If we have both Bool and number (Int64/UInt64/Float64) types,
+    /// convert all Bool to Int64/UInt64/Float64.
+    void transformBoolsAndNumbersToNumbers(DataTypes & data_types, TypeIndexesSet & type_indexes)
    {
        bool have_floats = type_indexes.contains(TypeIndex::Float64);
-        bool have_integers = type_indexes.contains(TypeIndex::Int64);
+        bool have_signed_integers = type_indexes.contains(TypeIndex::Int64);
+        bool have_unsigned_integers = type_indexes.contains(TypeIndex::UInt64);
        bool have_bools = type_indexes.contains(TypeIndex::UInt8);
        /// Check if we have both Bool and Integer/Float.
-        if (!have_bools || (!have_integers && !have_floats))
+        if (!have_bools || (!have_signed_integers && !have_unsigned_integers && !have_floats))
            return;

        for (auto & type : data_types)
        {
            if (isBool(type))
            {
-                if (have_integers)
+                if (have_signed_integers)
                    type = std::make_shared<DataTypeInt64>();
+                else if (have_unsigned_integers)
+                    type = std::make_shared<DataTypeUInt64>();
                else
                    type = std::make_shared<DataTypeFloat64>();
            }
        }
+
+        type_indexes.erase(TypeIndex::UInt8);
    }

    /// If we have type Nothing/Nullable(Nothing) and some other non Nothing types,
    /// convert all Nothing/Nullable(Nothing) types to the first non Nothing.
    /// For example, when we have [Nothing, Array(Int64)] it will convert it to [Array(Int64), Array(Int64)]
    /// (it can happen when transforming complex nested types like [Array(Nothing), Array(Array(Int64))])
-    void transformNothingComplexTypes(DataTypes & data_types)
+    void transformNothingComplexTypes(DataTypes & data_types, TypeIndexesSet & type_indexes)
    {
        bool have_nothing = false;
        DataTypePtr not_nothing_type = nullptr;
@ -177,10 +219,12 @@ namespace
            if (isNothing(removeNullable(type)))
                type = not_nothing_type;
        }
+
+        updateTypeIndexes(data_types, type_indexes);
    }

    /// If we have both Nullable and non Nullable types, make all types Nullable
-    void transformNullableTypes(DataTypes & data_types, const TypeIndexesSet & type_indexes)
+    void transformNullableTypes(DataTypes & data_types, TypeIndexesSet & type_indexes)
    {
        if (!type_indexes.contains(TypeIndex::Nullable))
            return;
@ -190,6 +234,8 @@ namespace
            if (type->canBeInsideNullable())
                type = makeNullable(type);
        }
+
+        updateTypeIndexes(data_types, type_indexes);
    }

    /// If we have Tuple with the same nested types like Tuple(Int64, Int64),
@ -197,11 +243,12 @@ namespace
    /// For example when we had type Tuple(Int64, Nullable(Nothing)) and we
    /// transformed it to Tuple(Nullable(Int64), Nullable(Int64)) we will
    /// also transform it to Array(Nullable(Int64))
-    void transformTuplesWithEqualNestedTypesToArrays(DataTypes & data_types, const TypeIndexesSet & type_indexes)
+    void transformTuplesWithEqualNestedTypesToArrays(DataTypes & data_types, TypeIndexesSet & type_indexes)
    {
        if (!type_indexes.contains(TypeIndex::Tuple))
            return;

+        bool remove_tuple_index = true;
        for (auto & type : data_types)
        {
            if (isTuple(type))
@ -209,8 +256,13 @@ namespace
                const auto * tuple_type = assert_cast<const DataTypeTuple *>(type.get());
                if (checkIfTypesAreEqual(tuple_type->getElements()))
                    type = std::make_shared<DataTypeArray>(tuple_type->getElements().back());
+                else
+                    remove_tuple_index = false;
            }
        }
+
+        if (remove_tuple_index)
+            type_indexes.erase(TypeIndex::Tuple);
    }

    template <bool is_json>
@ -221,7 +273,7 @@ namespace
    /// For example, if we have [Tuple(Nullable(Nothing), String), Array(Date), Tuple(Date, String)]
    /// it will convert them all to Array(String)
    void transformJSONTuplesAndArraysToArrays(
-        DataTypes & data_types, const FormatSettings & settings, const TypeIndexesSet & type_indexes, JSONInferenceInfo * json_info)
+        DataTypes & data_types, const FormatSettings & settings, TypeIndexesSet & type_indexes, JSONInferenceInfo * json_info)
    {
        if (!type_indexes.contains(TypeIndex::Tuple))
            return;
@ -266,12 +318,14 @@ namespace
                if (isArray(type) || isTuple(type))
                    type = std::make_shared<DataTypeArray>(nested_types.back());
            }
+
+            type_indexes.erase(TypeIndex::Tuple);
        }
    }

    /// If we have Map and Object(JSON) types, convert all Map types to Object(JSON).
    /// If we have Map types with different value types, convert all Map types to Object(JSON)
-    void transformMapsAndObjectsToObjects(DataTypes & data_types, const TypeIndexesSet & type_indexes)
+    void transformMapsAndObjectsToObjects(DataTypes & data_types, TypeIndexesSet & type_indexes)
    {
        if (!type_indexes.contains(TypeIndex::Map))
            return;
@ -298,9 +352,11 @@ namespace
            if (isMap(type))
                type = std::make_shared<DataTypeObject>("json", true);
        }
+
+        type_indexes.erase(TypeIndex::Map);
    }

-    void transformMapsObjectsAndStringsToStrings(DataTypes & data_types, const TypeIndexesSet & type_indexes)
+    void transformMapsObjectsAndStringsToStrings(DataTypes & data_types, TypeIndexesSet & type_indexes)
    {
        bool have_maps = type_indexes.contains(TypeIndex::Map);
        bool have_objects = type_indexes.contains(TypeIndex::Object);
@ -315,19 +371,26 @@ namespace
            if (isMap(type) || isObject(type))
                type = std::make_shared<DataTypeString>();
        }
+
+        type_indexes.erase(TypeIndex::Map);
+        type_indexes.erase(TypeIndex::Object);
    }

    template <bool is_json>
    void transformInferredTypesIfNeededImpl(DataTypes & types, const FormatSettings & settings, JSONInferenceInfo * json_info)
    {
-        auto transform_simple_types = [&](DataTypes & data_types, const TypeIndexesSet & type_indexes)
+        auto transform_simple_types = [&](DataTypes & data_types, TypeIndexesSet & type_indexes)
        {
            /// Remove all Nothing type if possible.
            transformNothingSimpleTypes(data_types, type_indexes);

-            /// Transform integers to floats if needed.
            if (settings.try_infer_integers)
+            {
+                /// Transform Int64 to UInt64 if needed.
+                transformIntegers(data_types, type_indexes);
+                /// Transform integers to floats if needed.
                transformIntegersAndFloatsToFloats(data_types, type_indexes);
+            }

            /// Transform Date to DateTime or both to String if needed.
            if (settings.try_infer_dates || settings.try_infer_datetimes)
@ -347,14 +410,14 @@ namespace
                transformBoolsAndNumbersToNumbers(data_types, type_indexes);
        };

-        auto transform_complex_types = [&](DataTypes & data_types, const TypeIndexesSet & type_indexes)
+        auto transform_complex_types = [&](DataTypes & data_types, TypeIndexesSet & type_indexes)
        {
            /// Make types Nullable if needed.
            transformNullableTypes(data_types, type_indexes);

            /// If we have type Nothing, it means that we had empty Array/Map while inference.
            /// If there is at least one non Nothing type, change all Nothing types to it.
-            transformNothingComplexTypes(data_types);
+            transformNothingComplexTypes(data_types, type_indexes);

            if constexpr (!is_json)
                return;
@ -569,12 +632,30 @@ namespace
                    return read_int ? std::make_shared<DataTypeInt64>() : nullptr;

                char * int_end = buf.position();
-                /// We cam safely get back to the start of the number, because we read from a string and we didn't reach eof.
+                /// We can safely get back to the start of the number, because we read from a string and we didn't reach eof.
                buf.position() = number_start;
+
+                bool read_uint = false;
+                char * uint_end = nullptr;
+                /// In case of Int64 overflow we can try to infer UInt64.
+                if (!read_int)
+                {
+                    UInt64 tmp_uint;
+                    read_uint = tryReadIntText(tmp_uint, buf);
+                    /// If we reached eof, it cannot be float (it requires no less data than integer)
+                    if (buf.eof())
+                        return read_uint ? std::make_shared<DataTypeUInt64>() : nullptr;
+
+                    uint_end = buf.position();
+                    buf.position() = number_start;
+                }
+
                if (tryReadFloatText(tmp_float, buf))
                {
                    if (read_int && buf.position() == int_end)
                        return std::make_shared<DataTypeInt64>();
+                    if (read_uint && buf.position() == uint_end)
+                        return std::make_shared<DataTypeUInt64>();
                    return std::make_shared<DataTypeFloat64>();
                }

@ -590,6 +671,19 @@ namespace
            bool read_int = tryReadIntText(tmp_int, peekable_buf);
            auto * int_end = peekable_buf.position();
            peekable_buf.rollbackToCheckpoint(true);
+
+            bool read_uint = false;
+            char * uint_end = nullptr;
+            /// In case of Int64 overflow we can try to infer UInt64.
+            if (!read_int)
+            {
+                PeekableReadBufferCheckpoint new_checkpoint(peekable_buf);
+                UInt64 tmp_uint;
+                read_uint = tryReadIntText(tmp_uint, peekable_buf);
+                uint_end = peekable_buf.position();
+                peekable_buf.rollbackToCheckpoint(true);
+            }
+
            if (tryReadFloatText(tmp_float, peekable_buf))
            {
                /// Float parsing reads no fewer bytes than integer parsing,
@ -597,6 +691,8 @@ namespace
                /// If it's the same, then it's integer.
                if (read_int && peekable_buf.position() == int_end)
                    return std::make_shared<DataTypeInt64>();
+                if (read_uint && peekable_buf.position() == uint_end)
+                    return std::make_shared<DataTypeUInt64>();
                return std::make_shared<DataTypeFloat64>();
            }
        }
@ -874,6 +970,11 @@ DataTypePtr tryInferNumberFromString(std::string_view field, const FormatSetting
        Int64 tmp_int;
        if (tryReadIntText(tmp_int, buf) && buf.eof())
            return std::make_shared<DataTypeInt64>();
+
+        /// In case of Int64 overflow, try to infer UInt64
+        UInt64 tmp_uint;
+        if (tryReadIntText(tmp_uint, buf) && buf.eof())
+            return std::make_shared<DataTypeUInt64>();
    }

    /// We cam safely get back to the start of buffer, because we read from a string and we didn't reach eof.
--- a/src/Functions/CMakeLists.txt
+++ b/src/Functions/CMakeLists.txt
@ -86,6 +86,10 @@ if (TARGET ch_contrib::rapidjson)
    list (APPEND PRIVATE_LIBS ch_contrib::rapidjson)
 endif()

+if (TARGET ch_contrib::crc32-vpmsum)
+    list (APPEND PUBLIC_LIBS ch_contrib::crc32-vpmsum)
+endif()
+
 add_subdirectory(GatherUtils)
 list (APPEND PRIVATE_LIBS clickhouse_functions_gatherutils)

--- a/src/Functions/FunctionsStringHash.cpp
+++ b/src/Functions/FunctionsStringHash.cpp
@ -14,6 +14,10 @@

 #include <city.h>

+#if (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#include "vec_crc32.h"
+#endif
+
 namespace DB
 {

@ -36,6 +40,8 @@ struct Hash
        return _mm_crc32_u64(crc, val);
 #elif defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
        return __crc32cd(static_cast<UInt32>(crc), val);
+#elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+        return crc32_ppc(crc, reinterpret_cast<const unsigned char *>(&val), sizeof(val));
 #elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
        return s390x_crc32(crc, val);
 #else
@ -49,6 +55,8 @@ struct Hash
        return _mm_crc32_u32(crc, val);
 #elif defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
        return __crc32cw(crc, val);
+#elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+        return crc32_ppc(crc, reinterpret_cast<const unsigned char *>(&val), sizeof(val));
 #elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
        return s390x_crc32_u32(crc, val);
 #else
@ -62,6 +70,8 @@ struct Hash
        return _mm_crc32_u16(crc, val);
 #elif defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
        return __crc32ch(crc, val);
+#elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+        return crc32_ppc(crc, reinterpret_cast<const unsigned char *>(&val), sizeof(val));
 #elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
        return s390x_crc32_u16(crc, val);
 #else
@ -75,6 +85,8 @@ struct Hash
        return _mm_crc32_u8(crc, val);
 #elif defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
        return __crc32cb(crc, val);
+#elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+        return crc32_ppc(crc, reinterpret_cast<const unsigned char *>(&val), sizeof(val));
 #elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
        return s390x_crc32_u8(crc, val);
 #else
--- a/src/Functions/FunctionsStringSimilarity.cpp
+++ b/src/Functions/FunctionsStringSimilarity.cpp
@ -24,6 +24,10 @@
 #    include <arm_acle.h>
 #endif

+#if (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#include "vec_crc32.h"
+#endif
+
 namespace DB
 {
 /** Distance function implementation.
@ -70,6 +74,8 @@ struct NgramDistanceImpl
        return _mm_crc32_u64(code_points[2], combined) & 0xFFFFu;
 #elif defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
        return __crc32cd(code_points[2], combined) & 0xFFFFu;
+#elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+        return crc32_ppc(code_points[2], reinterpret_cast<const unsigned char *>(&combined), sizeof(combined)) & 0xFFFFu;
 #elif defined(__s390x__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
        return s390x_crc32(code_points[2], combined) & 0xFFFFu;
 #else
--- a/src/IO/MMapReadBufferFromFileWithCache.cpp
+++ b/src/IO/MMapReadBufferFromFileWithCache.cpp
@ -18,6 +18,7 @@ void MMapReadBufferFromFileWithCache::init()

    size_t page_size = static_cast<size_t>(::getPageSize());
    ReadBuffer::padded = (length % page_size) > 0 && (length % page_size) <= (page_size - (PADDING_FOR_SIMD - 1));
+    ReadBufferFromFileBase::file_size = length;
 }


--- a/src/IO/ReadBufferFromFileBase.cpp
+++ b/src/IO/ReadBufferFromFileBase.cpp
@ -1,4 +1,5 @@
 #include <IO/ReadBufferFromFileBase.h>
+#include <Interpreters/Context.h>

 namespace DB
 {
@ -31,4 +32,17 @@ size_t ReadBufferFromFileBase::getFileSize()
    throw Exception(ErrorCodes::UNKNOWN_FILE_SIZE, "Cannot find out file size for read buffer");
 }

+void ReadBufferFromFileBase::setProgressCallback(ContextPtr context)
+{
+    auto file_progress_callback = context->getFileProgressCallback();
+
+    if (!file_progress_callback)
+        return;
+
+    setProfileCallback([file_progress_callback](const ProfileInfo & progress)
+    {
+       file_progress_callback(FileProgress(progress.bytes_read, 0));
+    });
+}
+
 }
--- a/src/IO/ReadBufferFromFileBase.h
+++ b/src/IO/ReadBufferFromFileBase.h
@ -3,6 +3,7 @@
 #include <IO/BufferWithOwnMemory.h>
 #include <IO/SeekableReadBuffer.h>
 #include <IO/WithFileName.h>
+#include <Interpreters/Context_fwd.h>
 #include <base/time.h>

 #include <functional>
@ -51,6 +52,8 @@ public:

    size_t getFileSize() override;

+    void setProgressCallback(ContextPtr context);
+
 protected:
    std::optional<size_t> file_size;
    ProfileCallback profile_callback;
--- a/src/IO/ReadBufferFromFileDescriptor.cpp
+++ b/src/IO/ReadBufferFromFileDescriptor.cpp
@ -7,7 +7,6 @@
 #include <Common/CurrentMetrics.h>
 #include <IO/ReadBufferFromFileDescriptor.h>
 #include <IO/WriteHelpers.h>
-#include <IO/Progress.h>
 #include <Common/filesystemHelpers.h>
 #include <sys/stat.h>
 #include <Interpreters/Context.h>
@ -254,18 +253,4 @@ size_t ReadBufferFromFileDescriptor::getFileSize()
    return getSizeFromFileDescriptor(fd, getFileName());
 }

-
-void ReadBufferFromFileDescriptor::setProgressCallback(ContextPtr context)
-{
-    auto file_progress_callback = context->getFileProgressCallback();
-
-    if (!file_progress_callback)
-        return;
-
-    setProfileCallback([file_progress_callback](const ProfileInfo & progress)
-    {
-        file_progress_callback(FileProgress(progress.bytes_read, 0));
-    });
-}
-
 }
--- a/Show More
+++ b/Show More
				`@ -0,0 +1 @@`
				`Subproject commit 452155439389311fc7d143621eaf56a258e02476`