Merge branch 'master' into optimize_uniq_to_count2

2024-11-22 07:31:57 +00:00 · 2023-09-19 10:29:03 +08:00 · 2023-09-19 10:29:03 +08:00 · 231d16040b
commit 231d16040b
parent 70a262a775 9e1e8f4d67
374 changed files with 7764 additions and 2119 deletions
--- a/.github/workflows/auto_release.yml
+++ b/.github/workflows/auto_release.yml
@ -0,0 +1,45 @@
+name: AutoRelease
+
+env:
+  # Force the stdout and stderr streams to be unbuffered
+  PYTHONUNBUFFERED: 1
+
+concurrency:
+  group: auto-release
+on: # yamllint disable-line rule:truthy
+  # schedule:
+  #   - cron: '0 10-16 * * 1-5'
+  workflow_dispatch:
+
+jobs:
+  CherryPick:
+    runs-on: [self-hosted, style-checker-aarch64]
+    steps:
+      - name: Set envs
+        # https://docs.github.com/en/actions/learn-github-actions/workflow-commands-for-github-actions#multiline-strings
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/cherry_pick
+          ROBOT_CLICKHOUSE_SSH_KEY<<RCSK
+          ${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
+          RCSK
+          REPO_OWNER=ClickHouse
+          REPO_NAME=ClickHouse
+          REPO_TEAM=core
+          EOF
+      - name: Check out repository code
+        uses: ClickHouse/checkout@v1
+        with:
+          clear-repository: true
+          token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
+          fetch-depth: 0
+      - name: Auto-release
+        run: |
+          cd "$GITHUB_WORKSPACE/tests/ci"
+          python3 auto_release.py --release-after-days=3
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH"
--- a/contrib/abseil-cpp
+++ b/contrib/abseil-cpp
@ -1 +1 @@
-Subproject commit 215105818dfde3174fe799600bb0f3cae233d0bf
+Subproject commit 5655528c41830f733160de4fb0b99073841bae9e
--- a/contrib/abseil-cpp-cmake/CMakeLists.txt
+++ b/contrib/abseil-cpp-cmake/CMakeLists.txt
@ -1,5 +1,5 @@
 set(ABSL_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/abseil-cpp")
-set(BUILD_TESTING OFF)
+
 set(ABSL_PROPAGATE_CXX_STD ON)
 add_subdirectory("${ABSL_ROOT_DIR}" "${ClickHouse_BINARY_DIR}/contrib/abseil-cpp")

--- a/contrib/re2-cmake/CMakeLists.txt
+++ b/contrib/re2-cmake/CMakeLists.txt
@ -1,14 +1,3 @@
-# Copyright 2015 The RE2 Authors.  All Rights Reserved.
-# Use of this source code is governed by a BSD-style
-# license that can be found in the LICENSE file.
-
-# This file was edited for ClickHouse
-
-string(FIND ${CMAKE_CURRENT_BINARY_DIR} " " _have_space)
-if(_have_space GREATER 0)
-    message(FATAL_ERROR "Using spaces in build path [${CMAKE_CURRENT_BINARY_DIR}] highly not recommended. Library re2st will be disabled.")
-endif()
-
 set(SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/re2")

 set(RE2_SOURCES
@ -35,33 +24,9 @@ set(RE2_SOURCES
    ${SRC_DIR}/util/rune.cc
    ${SRC_DIR}/util/strutil.cc
 )
-add_library(re2 ${RE2_SOURCES})
-target_include_directories(re2 PUBLIC "${SRC_DIR}")
-target_link_libraries(re2 ch_contrib::abseil_str_format)

-# Building re2 which is thread-safe and re2_st which is not.
-# re2 changes its state during matching of regular expression, e.g. creates temporary DFA.
-# It uses RWLock to process the same regular expression object from different threads.
-# In order to avoid redundant locks in some cases, we use not thread-safe version of the library (re2_st).
+add_library(_re2 ${RE2_SOURCES})
+target_include_directories(_re2 PUBLIC "${SRC_DIR}")
+target_link_libraries(_re2 ch_contrib::abseil_str_format)

-add_library(re2_st ${RE2_SOURCES})
-target_compile_definitions (re2_st PRIVATE NDEBUG NO_THREADS re2=re2_st)
-target_include_directories (re2_st PRIVATE .)
-target_include_directories (re2_st SYSTEM PUBLIC ${CMAKE_CURRENT_BINARY_DIR})
-target_include_directories (re2_st SYSTEM BEFORE PUBLIC ${SRC_DIR})
-target_link_libraries (re2_st ch_contrib::abseil_str_format)
-
-file (MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/re2_st)
-foreach (FILENAME filtered_re2.h re2.h set.h stringpiece.h)
-    add_custom_command (OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/re2_st/${FILENAME}"
-        COMMAND ${CMAKE_COMMAND} -DSOURCE_FILENAME="${SRC_DIR}/re2/${FILENAME}"
-            -DTARGET_FILENAME="${CMAKE_CURRENT_BINARY_DIR}/re2_st/${FILENAME}"
-            -P "${CMAKE_CURRENT_SOURCE_DIR}/re2_transform.cmake"
-        COMMENT "Creating ${FILENAME} for re2_st library.")
-    add_custom_target (transform_${FILENAME} DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/re2_st/${FILENAME}")
-    add_dependencies (re2_st transform_${FILENAME})
-endforeach ()
-
-# NOTE: you should not change name of library here, since it is used to generate required header (see above)
-add_library(ch_contrib::re2 ALIAS re2)
-add_library(ch_contrib::re2_st ALIAS re2_st)
+add_library(ch_contrib::re2 ALIAS _re2)
--- a/contrib/re2-cmake/re2_transform.cmake
+++ b/contrib/re2-cmake/re2_transform.cmake
@ -1,10 +0,0 @@
-file (READ ${SOURCE_FILENAME} CONTENT)
-string (REGEX REPLACE "using re2::RE2;" "" CONTENT "${CONTENT}")
-string (REGEX REPLACE "using re2::LazyRE2;" "" CONTENT "${CONTENT}")
-string (REGEX REPLACE "namespace re2 {" "namespace re2_st {" CONTENT "${CONTENT}")
-string (REGEX REPLACE "re2::" "re2_st::" CONTENT "${CONTENT}")
-string (REGEX REPLACE "\"re2/" "\"re2_st/" CONTENT "${CONTENT}")
-string (REGEX REPLACE "(.\\*?_H)" "\\1_ST" CONTENT "${CONTENT}")
-string (REGEX REPLACE "#define MUTEX_IS_PTHREAD_RWLOCK" "#undef MUTEX_IS_PTHREAD_RWLOCK" CONTENT "${CONTENT}")
-string (REGEX REPLACE "typedef std::mutex MutexType;" "struct MutexType { void lock() {} void unlock() {} };" CONTENT "${CONTENT}")
-file (WRITE ${TARGET_FILENAME} "${CONTENT}")
--- a/contrib/s2geometry
+++ b/contrib/s2geometry
@ -1 +1 @@
-Subproject commit 4a7ebd5da04cb6c9ea38bbf5914a9f8f3c768564
+Subproject commit 0547c38371777a1c1c8be263a6f05c3bf71bb05b
--- a/contrib/s2geometry-cmake/CMakeLists.txt
+++ b/contrib/s2geometry-cmake/CMakeLists.txt
@ -7,12 +7,6 @@ endif()

 set(S2_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/s2geometry/src")

-set(ABSL_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/abseil-cpp")
-if(NOT EXISTS "${ABSL_SOURCE_DIR}/CMakeLists.txt")
-    message(FATAL_ERROR " submodule contrib/abseil-cpp is missing. To fix try run: \n git submodule update --init --recursive")
-endif()
-
-
 set(S2_SRCS
    "${S2_SOURCE_DIR}/s2/encoded_s2cell_id_vector.cc"
    "${S2_SOURCE_DIR}/s2/encoded_s2point_vector.cc"
@ -58,7 +52,9 @@ set(S2_SRCS
    "${S2_SOURCE_DIR}/s2/s2edge_crossings.cc"
    "${S2_SOURCE_DIR}/s2/s2edge_distances.cc"
    "${S2_SOURCE_DIR}/s2/s2edge_tessellator.cc"
+    "${S2_SOURCE_DIR}/s2/s2error.cc"
    "${S2_SOURCE_DIR}/s2/s2furthest_edge_query.cc"
+    "${S2_SOURCE_DIR}/s2/s2hausdorff_distance_query.cc"
    "${S2_SOURCE_DIR}/s2/s2latlng.cc"
    "${S2_SOURCE_DIR}/s2/s2latlng_rect.cc"
    "${S2_SOURCE_DIR}/s2/s2latlng_rect_bounder.cc"
@ -93,59 +89,58 @@ set(S2_SRCS
    "${S2_SOURCE_DIR}/s2/s2shape_index_buffered_region.cc"
    "${S2_SOURCE_DIR}/s2/s2shape_index_measures.cc"
    "${S2_SOURCE_DIR}/s2/s2shape_measures.cc"
+    "${S2_SOURCE_DIR}/s2/s2shape_nesting_query.cc"
    "${S2_SOURCE_DIR}/s2/s2shapeutil_build_polygon_boundaries.cc"
    "${S2_SOURCE_DIR}/s2/s2shapeutil_coding.cc"
    "${S2_SOURCE_DIR}/s2/s2shapeutil_contains_brute_force.cc"
    "${S2_SOURCE_DIR}/s2/s2shapeutil_conversion.cc"
    "${S2_SOURCE_DIR}/s2/s2shapeutil_edge_iterator.cc"
    "${S2_SOURCE_DIR}/s2/s2shapeutil_get_reference_point.cc"
-    "${S2_SOURCE_DIR}/s2/s2shapeutil_range_iterator.cc"
    "${S2_SOURCE_DIR}/s2/s2shapeutil_visit_crossing_edge_pairs.cc"
    "${S2_SOURCE_DIR}/s2/s2text_format.cc"
    "${S2_SOURCE_DIR}/s2/s2wedge_relations.cc"
    "${S2_SOURCE_DIR}/s2/s2winding_operation.cc"
-    "${S2_SOURCE_DIR}/s2/strings/serialize.cc"
    "${S2_SOURCE_DIR}/s2/util/bits/bit-interleave.cc"
-    "${S2_SOURCE_DIR}/s2/util/bits/bits.cc"
    "${S2_SOURCE_DIR}/s2/util/coding/coder.cc"
    "${S2_SOURCE_DIR}/s2/util/coding/varint.cc"
    "${S2_SOURCE_DIR}/s2/util/math/exactfloat/exactfloat.cc"
    "${S2_SOURCE_DIR}/s2/util/math/mathutil.cc"
    "${S2_SOURCE_DIR}/s2/util/units/length-units.cc"
-
 )

 add_library(_s2 ${S2_SRCS})
 add_library(ch_contrib::s2 ALIAS _s2)

-set_property(TARGET _s2 PROPERTY CXX_STANDARD 17)
-
 if (TARGET OpenSSL::SSL)
    target_link_libraries(_s2 PRIVATE OpenSSL::Crypto OpenSSL::SSL)
 endif()

 # Copied from contrib/s2geometry/CMakeLists
 target_link_libraries(_s2 PRIVATE
-        absl::base
-        absl::btree
-        absl::config
-        absl::core_headers
-        absl::dynamic_annotations
-        absl::endian
-        absl::fixed_array
-        absl::flat_hash_map
-        absl::flat_hash_set
-        absl::hash
-        absl::inlined_vector
-        absl::int128
-        absl::log_severity
-        absl::memory
-        absl::span
-        absl::str_format
-        absl::strings
-        absl::type_traits
-        absl::utility
-        )
+    absl::base
+    absl::btree
+    absl::check
+    absl::config
+    absl::core_headers
+    absl::dynamic_annotations
+    absl::endian
+    absl::fixed_array
+    absl::flags
+    absl::flat_hash_map
+    absl::flat_hash_set
+    absl::hash
+    absl::inlined_vector
+    absl::int128
+    absl::log
+    absl::log_severity
+    absl::memory
+    absl::span
+    absl::status
+    absl::str_format
+    absl::strings
+    absl::type_traits
+    absl::utility
+)

 target_include_directories(_s2 SYSTEM BEFORE PUBLIC "${S2_SOURCE_DIR}/")
 target_include_directories(_s2 SYSTEM PUBLIC "${ABSL_SOURCE_DIR}")
--- a/contrib/sysroot
+++ b/contrib/sysroot
@ -1 +1 @@
-Subproject commit e0d1b64da666afbfaa6f1ee0487c33f3fd2cd5cb
+Subproject commit b5fcabb24d28fc33024291b2c6c1abd807c7dba8
--- a/contrib/usearch
+++ b/contrib/usearch
@ -1 +1 @@
-Subproject commit f942b6f334b31716f9bdb02eb6a25fa6b222f5ba
+Subproject commit 955c6f9c11adfd89c912e0d1643d160b4e9e543f
--- a/docker/packager/packager
+++ b/docker/packager/packager
@ -325,7 +325,6 @@ def parse_env_variables(

    if additional_pkgs:
        # NOTE: This are the env for packages/build script
-        result.append("MAKE_APK=true")
        result.append("MAKE_RPM=true")
        result.append("MAKE_TGZ=true")

--- a/docker/test/integration/mysql_java_client/Dockerfile
+++ b/docker/test/integration/mysql_java_client/Dockerfile
@ -1,21 +1,15 @@
 # docker build -t clickhouse/mysql-java-client .
 # MySQL Java client docker container

-FROM ubuntu:18.04
+FROM openjdk:8-jdk-alpine

-RUN apt-get update && \
-    apt-get install -y software-properties-common build-essential openjdk-8-jdk libmysql-java curl
+RUN apk --no-cache add curl

-RUN rm -rf \
-        /var/lib/apt/lists/* \
-        /var/cache/debconf \
-        /tmp/* \
-RUN apt-get clean
-
-ARG ver=5.1.46
-RUN curl -L -o /mysql-connector-java-${ver}.jar https://repo1.maven.org/maven2/mysql/mysql-connector-java/${ver}/mysql-connector-java-${ver}.jar
-ENV CLASSPATH=$CLASSPATH:/mysql-connector-java-${ver}.jar
+ARG ver=8.1.0
+RUN curl -L -o /mysql-connector-j-${ver}.jar https://repo1.maven.org/maven2/com/mysql/mysql-connector-j/${ver}/mysql-connector-j-${ver}.jar
+ENV CLASSPATH=$CLASSPATH:/mysql-connector-j-${ver}.jar

 WORKDIR /jdbc
 COPY Test.java Test.java
-RUN javac Test.java
+COPY PreparedStatementsTest.java PreparedStatementsTest.java
+RUN javac Test.java PreparedStatementsTest.java
--- a/docker/test/integration/mysql_java_client/PreparedStatementsTest.java
+++ b/docker/test/integration/mysql_java_client/PreparedStatementsTest.java
@ -0,0 +1,193 @@
+import com.mysql.cj.MysqlType;
+
+import java.sql.*;
+
+public class PreparedStatementsTest {
+    public static void main(String[] args) {
+        int i = 0;
+        String host = "127.0.0.1";
+        String port = "9004";
+        String user = "default";
+        String password = "";
+        String database = "default";
+        while (i < args.length) {
+            switch (args[i]) {
+                case "--host":
+                    host = args[++i];
+                    break;
+                case "--port":
+                    port = args[++i];
+                    break;
+                case "--user":
+                    user = args[++i];
+                    break;
+                case "--password":
+                    password = args[++i];
+                    break;
+                case "--database":
+                    database = args[++i];
+                    break;
+                default:
+                    i++;
+                    break;
+            }
+        }
+
+        // useServerPrepStmts uses COM_STMT_PREPARE and COM_STMT_EXECUTE
+        // instead of COM_QUERY which allows us to test the binary protocol
+        String jdbcUrl = String.format("jdbc:mysql://%s:%s/%s?useSSL=false&useServerPrepStmts=true",
+                host, port, database);
+
+        try {
+            Class.forName("com.mysql.cj.jdbc.Driver");
+            Connection conn = DriverManager.getConnection(jdbcUrl, user, password);
+            testSimpleDataTypes(conn);
+            testStringTypes(conn);
+            testLowCardinalityAndNullableTypes(conn);
+            testDecimalTypes(conn);
+            testMiscTypes(conn);
+            testDateTypes(conn);
+            testUnusualDateTime64Scales(conn);
+            testDateTimeTimezones(conn);
+            conn.close();
+        } catch (Exception e) {
+            e.printStackTrace();
+            System.exit(1);
+        }
+    }
+
+    private static void testSimpleDataTypes(Connection conn) throws SQLException {
+        System.out.println("### testSimpleDataTypes");
+        ResultSet rs = conn.prepareStatement("SELECT * FROM ps_simple_data_types").executeQuery();
+        int rowNum = 1;
+        while (rs.next()) {
+            System.out.printf("Row #%d\n", rowNum++);
+            System.out.printf("%s, value: %d\n", getMysqlType(rs, "i8"), rs.getInt("i8"));
+            System.out.printf("%s, value: %d\n", getMysqlType(rs, "i16"), rs.getInt("i16"));
+            System.out.printf("%s, value: %d\n", getMysqlType(rs, "i32"), rs.getInt("i32"));
+            System.out.printf("%s, value: %d\n", getMysqlType(rs, "i64"), rs.getLong("i64"));
+            System.out.printf("%s, value: %s\n", getMysqlType(rs, "i128"), rs.getString("i128"));
+            System.out.printf("%s, value: %s\n", getMysqlType(rs, "i256"), rs.getString("i256"));
+            System.out.printf("%s, value: %d\n", getMysqlType(rs, "ui8"), rs.getInt("ui8"));
+            System.out.printf("%s, value: %d\n", getMysqlType(rs, "ui16"), rs.getInt("ui16"));
+            System.out.printf("%s, value: %d\n", getMysqlType(rs, "ui32"), rs.getLong("ui32"));
+            System.out.printf("%s, value: %s\n", getMysqlType(rs, "ui64"), rs.getString("ui64"));
+            System.out.printf("%s, value: %s\n", getMysqlType(rs, "ui128"), rs.getString("ui128"));
+            System.out.printf("%s, value: %s\n", getMysqlType(rs, "ui256"), rs.getString("ui256"));
+            System.out.printf("%s, value: %f\n", getMysqlType(rs, "f32"), rs.getFloat("f32"));
+            System.out.printf("%s, value: %f\n", getMysqlType(rs, "f64"), rs.getFloat("f64"));
+            System.out.printf("%s, value: %b\n", getMysqlType(rs, "b"), rs.getBoolean("b"));
+        }
+        System.out.println();
+    }
+
+    private static void testStringTypes(Connection conn) throws SQLException {
+        System.out.println("### testStringTypes");
+        ResultSet rs = conn.prepareStatement("SELECT * FROM ps_string_types").executeQuery();
+        int rowNum = 1;
+        while (rs.next()) {
+            System.out.printf("Row #%d\n", rowNum++);
+            System.out.printf("%s, value: %s\n", getMysqlType(rs, "s"), rs.getString("s"));
+            System.out.printf("%s, value: %s\n", getMysqlType(rs, "sn"), rs.getString("sn"));
+            System.out.printf("%s, value: %s\n", getMysqlType(rs, "lc"), rs.getString("lc"));
+            System.out.printf("%s, value: %s\n", getMysqlType(rs, "nlc"), rs.getString("nlc"));
+        }
+        System.out.println();
+    }
+
+    private static void testLowCardinalityAndNullableTypes(Connection conn) throws SQLException {
+        System.out.println("### testLowCardinalityAndNullableTypes");
+        ResultSet rs = conn.prepareStatement("SELECT * FROM ps_low_cardinality_and_nullable_types").executeQuery();
+        int rowNum = 1;
+        while (rs.next()) {
+            System.out.printf("Row #%d\n", rowNum++);
+            System.out.printf("%s, value: %s\n", getMysqlType(rs, "ilc"), rs.getInt("ilc"));
+            System.out.printf("%s, value: %s\n", getMysqlType(rs, "dlc"), rs.getDate("dlc"));
+            // NULL int is represented as zero
+            System.out.printf("%s, value: %s\n", getMysqlType(rs, "ni"), rs.getInt("ni"));
+        }
+        System.out.println();
+    }
+
+    private static void testDecimalTypes(Connection conn) throws SQLException {
+        System.out.println("### testDecimalTypes");
+        ResultSet rs = conn.prepareStatement("SELECT * FROM ps_decimal_types").executeQuery();
+        int rowNum = 1;
+        while (rs.next()) {
+            System.out.printf("Row #%d\n", rowNum++);
+            System.out.printf("%s, value: %s\n", getMysqlType(rs, "d32"), rs.getBigDecimal("d32").toPlainString());
+            System.out.printf("%s, value: %s\n", getMysqlType(rs, "d64"), rs.getBigDecimal("d64").toPlainString());
+            System.out.printf("%s, value: %s\n", getMysqlType(rs, "d128_native"),
+                    rs.getBigDecimal("d128_native").toPlainString());
+            System.out.printf("%s, value: %s\n", getMysqlType(rs, "d128_text"), rs.getString("d128_text"));
+            System.out.printf("%s, value: %s\n", getMysqlType(rs, "d256"), rs.getString("d256"));
+        }
+        System.out.println();
+    }
+
+    private static void testDateTypes(Connection conn) throws SQLException {
+        System.out.println("### testDateTypes");
+        ResultSet rs = conn.prepareStatement("SELECT * FROM ps_date_types").executeQuery();
+        int rowNum = 1;
+        while (rs.next()) {
+            System.out.printf("Row #%d\n", rowNum++);
+            System.out.printf("%s, value: %s\n", getMysqlType(rs, "d"), rs.getDate("d"));
+            System.out.printf("%s, value: %s\n", getMysqlType(rs, "d32"), rs.getDate("d32"));
+            System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt"), rs.getTimestamp("dt"));
+            System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_3"), rs.getTimestamp("dt64_3"));
+            System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_6"), rs.getTimestamp("dt64_6"));
+            System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_9"), rs.getTimestamp("dt64_9"));
+        }
+        System.out.println();
+    }
+
+    private static void testUnusualDateTime64Scales(Connection conn) throws SQLException {
+        System.out.println("### testUnusualDateTime64Scales");
+        ResultSet rs = conn.prepareStatement("SELECT * FROM ps_unusual_datetime64_scales").executeQuery();
+        int rowNum = 1;
+        while (rs.next()) {
+            System.out.printf("Row #%d\n", rowNum++);
+            System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_0"), rs.getTimestamp("dt64_0"));
+            System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_1"), rs.getTimestamp("dt64_1"));
+            System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_2"), rs.getTimestamp("dt64_2"));
+            System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_4"), rs.getTimestamp("dt64_4"));
+            System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_5"), rs.getTimestamp("dt64_5"));
+            System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_7"), rs.getTimestamp("dt64_7"));
+            System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_8"), rs.getTimestamp("dt64_8"));
+        }
+        System.out.println();
+    }
+
+    private static void testDateTimeTimezones(Connection conn) throws SQLException {
+        System.out.println("### testDateTimeTimezones");
+        ResultSet rs = conn.prepareStatement("SELECT * FROM ps_datetime_timezones").executeQuery();
+        int rowNum = 1;
+        while (rs.next()) {
+            System.out.printf("Row #%d\n", rowNum++);
+            System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt"), rs.getTimestamp("dt"));
+            System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_3"), rs.getTimestamp("dt64_3"));
+        }
+        System.out.println();
+    }
+
+    private static void testMiscTypes(Connection conn) throws SQLException {
+        System.out.println("### testMiscTypes");
+        ResultSet rs = conn.prepareStatement("SELECT * FROM ps_misc_types").executeQuery();
+        int rowNum = 1;
+        while (rs.next()) {
+            System.out.printf("Row #%d\n", rowNum++);
+            System.out.printf("%s, value: %s\n", getMysqlType(rs, "a"), rs.getString("a"));
+            System.out.printf("%s, value: %s\n", getMysqlType(rs, "u"), rs.getString("u"));
+            System.out.printf("%s, value: %s\n", getMysqlType(rs, "t"), rs.getString("t"));
+            System.out.printf("%s, value: %s\n", getMysqlType(rs, "m"), rs.getString("m"));
+        }
+        System.out.println();
+    }
+
+    private static String getMysqlType(ResultSet rs, String columnLabel) throws SQLException {
+        ResultSetMetaData meta = rs.getMetaData();
+        return String.format("%s type is %s", columnLabel,
+                MysqlType.getByJdbcType(meta.getColumnType(rs.findColumn(columnLabel))));
+    }
+
+}
--- a/docker/test/integration/mysql_java_client/Test.java
+++ b/docker/test/integration/mysql_java_client/Test.java
@ -46,6 +46,7 @@ class JavaConnectorTest {
        Connection conn = null;
        Statement stmt = null;
        try {
+            Class.forName("com.mysql.cj.jdbc.Driver");
            conn = DriverManager.getConnection(jdbcUrl, user, password);
            stmt = conn.createStatement();
            stmt.executeUpdate(CREATE_TABLE_SQL);
@ -69,7 +70,7 @@ class JavaConnectorTest {

            stmt.close();
            conn.close();
-        } catch (SQLException e) {
+        } catch (Exception e) {
            e.printStackTrace();
            System.exit(1);
        }
--- a/docker/test/integration/runner/compose/docker_compose_mysql_java_client.yml
+++ b/docker/test/integration/runner/compose/docker_compose_mysql_java_client.yml
@ -3,4 +3,4 @@ services:
  java1:
    image: clickhouse/mysql-java-client:${DOCKER_MYSQL_JAVA_CLIENT_TAG:-latest}
    # to keep container running
-    command: sleep infinity
+    command: sleep 1d
--- a/docker/test/performance-comparison/compare.sh
+++ b/docker/test/performance-comparison/compare.sh
@ -394,7 +394,7 @@ do
 done

 # for each query run, prepare array of metrics from query log
-clickhouse-local --query "
+clickhouse-local --multiquery --query "
 create view query_runs as select * from file('analyze/query-runs.tsv', TSV,
    'test text, query_index int, query_id text, version UInt8, time float');

@ -551,7 +551,7 @@ numactl --cpunodebind=all --membind=all numactl --show
 #   If the available memory falls below 2 * size, GNU parallel will suspend some of the running jobs.
 numactl --cpunodebind=all --membind=all parallel -v --joblog analyze/parallel-log.txt --memsuspend 15G --null < analyze/commands.txt 2>> analyze/errors.log

-clickhouse-local --query "
+clickhouse-local --multiquery --query "
 -- Join the metric names back to the metric statistics we've calculated, and make
 -- a denormalized table of them -- statistics for all metrics for all queries.
 -- The WITH, ARRAY JOIN and CROSS JOIN do not like each other:
@ -649,7 +649,7 @@ rm ./*.{rep,svg} test-times.tsv test-dump.tsv unstable.tsv unstable-query-ids.ts
 cat analyze/errors.log >> report/errors.log ||:
 cat profile-errors.log >> report/errors.log ||:

-clickhouse-local --query "
+clickhouse-local --multiquery --query "
 create view query_display_names as select * from
    file('analyze/query-display-names.tsv', TSV,
        'test text, query_index int, query_display_name text')
@ -950,7 +950,7 @@ create table all_query_metrics_tsv engine File(TSV, 'report/all-query-metrics.ts
 for version in {right,left}
 do
    rm -rf data
-    clickhouse-local --query "
+    clickhouse-local --multiquery --query "
 create view query_profiles as
    with 0 as left, 1 as right
    select * from file('analyze/query-profiles.tsv', TSV,
@ -1120,7 +1120,7 @@ function report_metrics
 rm -rf metrics ||:
 mkdir metrics

-clickhouse-local --query "
+clickhouse-local --multiquery --query "
 create view right_async_metric_log as
    select * from file('right-async-metric-log.tsv', TSVWithNamesAndTypes)
    ;
@ -1180,7 +1180,7 @@ function upload_results
    # Prepare info for the CI checks table.
    rm -f ci-checks.tsv

-    clickhouse-local --query "
+    clickhouse-local --multiquery --query "
 create view queries as select * from file('report/queries.tsv', TSVWithNamesAndTypes);

 create table ci_checks engine File(TSVWithNamesAndTypes, 'ci-checks.tsv')
--- a/docker/test/stateless/Dockerfile
+++ b/docker/test/stateless/Dockerfile
@ -88,5 +88,10 @@ RUN npm install -g azurite \
 COPY run.sh /
 COPY setup_minio.sh /
 COPY setup_hdfs_minicluster.sh /
+COPY attach_gdb.lib /
+COPY utils.lib /
+
+# We store stress_tests.lib in stateless image to avoid duplication of this file in stress and upgrade tests
+COPY stress_tests.lib /

 CMD ["/bin/bash", "/run.sh"]
--- a/docker/test/stateless/attach_gdb.lib
+++ b/docker/test/stateless/attach_gdb.lib
@ -1,6 +1,6 @@
 #!/bin/bash

-source /usr/share/clickhouse-test/ci/utils.lib
+source /utils.lib

 function attach_gdb_to_clickhouse()
 {
--- a/docker/test/stateless/run.sh
+++ b/docker/test/stateless/run.sh
@ -22,10 +22,10 @@ dpkg -i package_folder/clickhouse-client_*.deb
 ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test

 # shellcheck disable=SC1091
-source /usr/share/clickhouse-test/ci/attach_gdb.lib || true  # FIXME: to not break old builds, clean on 2023-09-01
+source /attach_gdb.lib

 # shellcheck disable=SC1091
-source /usr/share/clickhouse-test/ci/utils.lib || true # FIXME: to not break old builds, clean on 2023-09-01
+source /utils.lib

 # install test configs
 /usr/share/clickhouse-test/config/install.sh
--- a/docker/test/stateless/stress_tests.lib
+++ b/docker/test/stateless/stress_tests.lib
--- a/docker/test/stateless/utils.lib
+++ b/docker/test/stateless/utils.lib
--- a/docker/test/stress/run.sh
+++ b/docker/test/stress/run.sh
@ -16,8 +16,8 @@ ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test

 # Stress tests and upgrade check uses similar code that was placed
 # in a separate bash library. See tests/ci/stress_tests.lib
-source /usr/share/clickhouse-test/ci/attach_gdb.lib
-source /usr/share/clickhouse-test/ci/stress_tests.lib
+source /attach_gdb.lib
+source /stress_tests.lib

 install_packages package_folder

--- a/docker/test/upgrade/run.sh
+++ b/docker/test/upgrade/run.sh
@ -16,8 +16,8 @@ ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_pre

 # Stress tests and upgrade check uses similar code that was placed
 # in a separate bash library. See tests/ci/stress_tests.lib
-source /usr/share/clickhouse-test/ci/attach_gdb.lib
-source /usr/share/clickhouse-test/ci/stress_tests.lib
+source /attach_gdb.lib
+source /stress_tests.lib

 azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log &
 ./setup_minio.sh stateless # to have a proper environment
@ -60,6 +60,12 @@ install_packages previous_release_package_folder
 # available for dump via clickhouse-local
 configure

+# async_replication setting doesn't exist on some older versions
+sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \
+  | sed "/<async_replication>1<\/async_replication>/d" \
+  > /etc/clickhouse-server/config.d/keeper_port.xml.tmp
+sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
+
 # it contains some new settings, but we can safely remove it
 rm /etc/clickhouse-server/config.d/merge_tree.xml
 rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml
@ -82,6 +88,12 @@ sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \
  > /etc/clickhouse-server/config.d/keeper_port.xml.tmp
 sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml

+# async_replication setting doesn't exist on some older versions
+sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \
+  | sed "/<async_replication>1<\/async_replication>/d" \
+  > /etc/clickhouse-server/config.d/keeper_port.xml.tmp
+sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
+
 # But we still need default disk because some tables loaded only into it
 sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml \
  | sed "s|<main><disk>s3</disk></main>|<main><disk>s3</disk></main><default><disk>default</disk></default>|" \
--- a/docs/en/engines/database-engines/replicated.md
+++ b/docs/en/engines/database-engines/replicated.md
@ -37,6 +37,8 @@ When creating a new replica of the database, this replica creates tables by itse

 [`ALTER TABLE FREEZE|ATTACH|FETCH|DROP|DROP DETACHED|DETACH PARTITION|PART`](../../sql-reference/statements/alter/partition.md) queries are allowed but not replicated. The database engine will only add/fetch/remove the partition/part to the current replica. However, if the table itself uses a Replicated table engine, then the data will be replicated after using `ATTACH`.

+In case you need only configure a cluster without maintaining table replication, refer to [Cluster Discovery](../../operations/cluster-discovery.md) feature.
+
 ## Usage Example {#usage-example}

 Creating a cluster with three hosts:
--- a/docs/en/engines/table-engines/integrations/embedded-rocksdb.md
+++ b/docs/en/engines/table-engines/integrations/embedded-rocksdb.md
@ -8,7 +8,7 @@ sidebar_label: EmbeddedRocksDB

 This engine allows integrating ClickHouse with [rocksdb](http://rocksdb.org/).

-## Creating a Table {#table_engine-EmbeddedRocksDB-creating-a-table}
+## Creating a Table {#creating-a-table}

 ``` sql
 CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
@ -85,7 +85,7 @@ You can also change any [rocksdb options](https://github.com/facebook/rocksdb/wi
 </rocksdb>
 ```

-## Supported operations {#table_engine-EmbeddedRocksDB-supported-operations}
+## Supported operations {#supported-operations}

 ### Inserts

--- a/docs/en/engines/table-engines/integrations/kafka.md
+++ b/docs/en/engines/table-engines/integrations/kafka.md
@ -14,7 +14,7 @@ Kafka lets you:
 - Organize fault-tolerant storage.
 - Process streams as they become available.

-## Creating a Table {#table_engine-kafka-creating-a-table}
+## Creating a Table {#creating-a-table}

 ``` sql
 CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
--- a/docs/en/engines/table-engines/integrations/nats.md
+++ b/docs/en/engines/table-engines/integrations/nats.md
@ -13,7 +13,7 @@ This engine allows integrating ClickHouse with [NATS](https://nats.io/).
 - Publish or subscribe to message subjects.
 - Process new messages as they become available.

-## Creating a Table {#table_engine-redisstreams-creating-a-table}
+## Creating a Table {#creating-a-table}

 ``` sql
 CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
--- a/docs/en/engines/table-engines/integrations/rabbitmq.md
+++ b/docs/en/engines/table-engines/integrations/rabbitmq.md
@ -13,7 +13,7 @@ This engine allows integrating ClickHouse with [RabbitMQ](https://www.rabbitmq.c
 - Publish or subscribe to data flows.
 - Process streams as they become available.

-## Creating a Table {#table_engine-rabbitmq-creating-a-table}
+## Creating a Table {#creating-a-table}

 ``` sql
 CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
--- a/docs/en/engines/table-engines/integrations/s3queue.md
+++ b/docs/en/engines/table-engines/integrations/s3queue.md
@ -63,7 +63,7 @@ SETTINGS
    mode = 'ordered';
 ```

-## Settings {#s3queue-settings}
+## Settings {#settings}

 ### mode {#mode}

@ -93,7 +93,7 @@ Possible values:

 Default value: `/`.

-### s3queue_loading_retries {#s3queue_loading_retries}
+### s3queue_loading_retries {#loading_retries}

 Retry file loading up to specified number of times. By default, there are no retries.
 Possible values:
@ -102,7 +102,7 @@ Possible values:

 Default value: `0`.

-### s3queue_polling_min_timeout_ms {#s3queue_polling_min_timeout_ms}
+### s3queue_polling_min_timeout_ms {#polling_min_timeout_ms}

 Minimal timeout before next polling (in milliseconds).

@ -112,7 +112,7 @@ Possible values:

 Default value: `1000`.

-### s3queue_polling_max_timeout_ms {#s3queue_polling_max_timeout_ms}
+### s3queue_polling_max_timeout_ms {#polling_max_timeout_ms}

 Maximum timeout before next polling (in milliseconds).

@ -122,7 +122,7 @@ Possible values:

 Default value: `10000`.

-### s3queue_polling_backoff_ms {#s3queue_polling_backoff_ms}
+### s3queue_polling_backoff_ms {#polling_backoff_ms}

 Polling backoff (in milliseconds).

@ -132,7 +132,7 @@ Possible values:

 Default value: `0`.

-### s3queue_tracked_files_limit {#s3queue_tracked_files_limit}
+### s3queue_tracked_files_limit {#tracked_files_limit}

 Allows to limit the number of Zookeeper nodes if the 'unordered' mode is used, does nothing for 'ordered' mode.
 If limit reached the oldest processed files will be deleted from ZooKeeper node and processed again.
@ -143,7 +143,7 @@ Possible values:

 Default value: `1000`.

-### s3queue_tracked_file_ttl_sec {#s3queue_tracked_file_ttl_sec}
+### s3queue_tracked_file_ttl_sec {#tracked_file_ttl_sec}

 Maximum number of seconds to store processed files in ZooKeeper node (store forever by default) for 'unordered' mode, does nothing for 'ordered' mode.
 After the specified number of seconds, the file will be re-imported.
@ -154,7 +154,7 @@ Possible values:

 Default value: `0`.

-### s3queue_polling_size {#s3queue_polling_size}
+### s3queue_polling_size {#polling_size}

 Maximum files to fetch from S3 with SELECT or in background task.
 Engine takes files for processing from S3 in batches.
--- a/docs/en/engines/table-engines/mergetree-family/annindexes.md
+++ b/docs/en/engines/table-engines/mergetree-family/annindexes.md
@ -252,7 +252,7 @@ CREATE TABLE table_with_usearch_index
 (
  id Int64,
  vectors Array(Float32),
-  INDEX [ann_index_name] vectors TYPE usearch([Distance]) [GRANULARITY N]
+  INDEX [ann_index_name] vectors TYPE usearch([Distance[, ScalarKind]]) [GRANULARITY N]
 )
 ENGINE = MergeTree
 ORDER BY id;
@ -265,7 +265,7 @@ CREATE TABLE table_with_usearch_index
 (
  id Int64,
  vectors Tuple(Float32[, Float32[, ...]]),
-  INDEX [ann_index_name] vectors TYPE usearch([Distance]) [GRANULARITY N]
+  INDEX [ann_index_name] vectors TYPE usearch([Distance[, ScalarKind]]) [GRANULARITY N]
 )
 ENGINE = MergeTree
 ORDER BY id;
@ -277,5 +277,8 @@ USearch currently supports two distance functions:
 - `cosineDistance`, also called cosine similarity, is the cosine of the angle between two (non-zero) vectors
  ([Wikipedia](https://en.wikipedia.org/wiki/Cosine_similarity)).

+USearch allows storing the vectors in reduced precision formats. Supported scalar kinds are `f64`, `f32`, `f16` or `i8`. If no scalar kind
+was specified during index creation, `f16` is used as default.
+
 For normalized data, `L2Distance` is usually a better choice, otherwise `cosineDistance` is recommended to compensate for scale. If no
-distance function was specified during index creation, `L2Distance` is used as default.
+distance function was specified during index creation, `L2Distance` is used as default.
--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@ -1222,7 +1222,6 @@ Configuration markup:
            <account_name>account</account_name>
            <account_key>pass123</account_key>
            <metadata_path>/var/lib/clickhouse/disks/blob_storage_disk/</metadata_path>
-            <cache_enabled>true</cache_enabled>
            <cache_path>/var/lib/clickhouse/disks/blob_storage_disk/cache/</cache_path>
            <skip_access_check>false</skip_access_check>
        </blob_storage_disk>
@ -1250,8 +1249,6 @@ Limit parameters (mainly for internal usage):

 Other parameters:
 * `metadata_path` - Path on local FS to store metadata files for Blob Storage. Default value is `/var/lib/clickhouse/disks/<disk_name>/`.
-* `cache_enabled` - Allows to cache mark and index files on local FS. Default value is `true`.
-* `cache_path` - Path on local FS where to store cached mark and index files. Default value is `/var/lib/clickhouse/disks/<disk_name>/cache/`.
 * `skip_access_check` - If true, disk access checks will not be performed on disk start-up. Default value is `false`.
 * `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
 * `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
--- a/docs/en/engines/table-engines/special/keepermap.md
+++ b/docs/en/engines/table-engines/special/keepermap.md
@ -20,7 +20,7 @@ For example:

 where path can be any other valid ZooKeeper path.

-## Creating a Table {#table_engine-KeeperMap-creating-a-table}
+## Creating a Table {#creating-a-table}

 ``` sql
 CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
@ -74,7 +74,7 @@ If multiple tables are created on the same ZooKeeper path, the values are persis
 As a result, it is possible to use `ON CLUSTER` clause when creating the table and sharing the data from multiple ClickHouse instances.  
 Of course, it's possible to manually run `CREATE TABLE` with same path on unrelated ClickHouse instances to have same data sharing effect.

-## Supported operations {#table_engine-KeeperMap-supported-operations}
+## Supported operations {#supported-operations}

 ### Inserts

--- a/docs/en/operations/cluster-discovery.md
+++ b/docs/en/operations/cluster-discovery.md
@ -0,0 +1,171 @@
+---
+slug: /en/operations/cluster-discovery
+sidebar_label: Cluster Discovery
+---
+# Cluster Discovery
+
+## Overview
+
+ClickHouse's Cluster Discovery feature simplifies cluster configuration by allowing nodes to automatically discover and register themselves without the need for explicit definition in the configuration files. This is especially beneficial in cases where the manual definition of each node becomes cumbersome.
+
+:::note
+
+Cluster Discovery is an experimental feature and can be changed or removed in future versions.
+To enable it include the `allow_experimental_cluster_discovery` setting in your configuration file:
+
+```xml
+<clickhouse>
+    <!-- ... -->
+    <allow_experimental_cluster_discovery>1</allow_experimental_cluster_discovery>
+    <!-- ... -->
+</clickhouse>
+```
+:::
+
+## Remote Servers Configuration
+
+### Traditional Manual Configuration
+
+Traditionally, in ClickHouse, each shard and replica in the cluster needed to be manually specified in the configuration:
+
+```xml
+<remote_servers>
+    <cluster_name>
+        <shard>
+            <replica>
+                <host>node1</host>
+                <port>9000</port>
+            </replica>
+            <replica>
+                <host>node2</host>
+                <port>9000</port>
+            </replica>
+        </shard>
+        <shard>
+            <replica>
+                <host>node3</host>
+                <port>9000</port>
+            </replica>
+            <replica>
+                <host>node4</host>
+                <port>9000</port>
+            </replica>
+        </shard>
+    </cluster_name>
+</remote_servers>
+
+```
+
+### Using Cluster Discovery
+
+With Cluster Discovery, rather than defining each node explicitly, you simply specify a path in ZooKeeper. All nodes that register under this path in ZooKeeper will be automatically discovered and added to the cluster.
+
+```xml
+<remote_servers>
+    <cluster_name>
+        <discovery>
+            <path>/clickhouse/discovery/cluster_name</path>
+        </discovery>
+    </cluster_name>
+</remote_servers>
+```
+
+If you want to specify a shard number for a particular node, you can include the `<shard>` tag within the `<discovery>` section:
+
+for `node1` and `node2`:
+
+```xml
+<discovery>
+    <path>/clickhouse/discovery/cluster_name</path>
+    <shard>1</shard>
+</discovery>
+```
+
+for `node3` and `node4`:
+
+```xml
+<discovery>
+    <path>/clickhouse/discovery/cluster_name</path>
+    <shard>2</shard>
+</discovery>
+```
+
+### Observer mode
+
+
+Nodes configured in observer mode will not register themselves as replicas.
+They will solely observe and discover other active replicas in the cluster without actively participating.
+To enable observer mode, include the `<observer/>` tag within the `<discovery>` section:
+
+```xml
+<discovery>
+    <path>/clickhouse/discovery/cluster_name</path>
+    <observer/>
+</discovery>
+```
+
+
+## Use-Cases and Limitations
+
+As nodes are added or removed from the specified ZooKeeper path, they are automatically discovered or removed from the cluster without the need for configuration changes or server restarts.
+
+However, changes affect only cluster configuration, not the data or existing databases and tables.
+
+Consider the following example with a cluster of 3 nodes:
+
+
+```xml
+<remote_servers>
+    <default>
+        <discovery>
+            <path>/clickhouse/discovery/default_cluster</path>
+        </discovery>
+    </default>
+</remote_servers>
+```
+
+```
+SELECT * EXCEPT (default_database, errors_count, slowdowns_count, estimated_recovery_time, database_shard_name, database_replica_name)
+FROM system.clusters WHERE cluster = 'default';
+
+┌─cluster─┬─shard_num─┬─shard_weight─┬─replica_num─┬─host_name────┬─host_address─┬─port─┬─is_local─┬─user─┬─is_active─┐
+│ default │         1 │            1 │           1 │ 92d3c04025e8 │ 172.26.0.5   │ 9000 │        0 │      │      ᴺᵁᴸᴸ │
+│ default │         1 │            1 │           2 │ a6a68731c21b │ 172.26.0.4   │ 9000 │        1 │      │      ᴺᵁᴸᴸ │
+│ default │         1 │            1 │           3 │ 8e62b9cb17a1 │ 172.26.0.2   │ 9000 │        0 │      │      ᴺᵁᴸᴸ │
+└─────────┴───────────┴──────────────┴─────────────┴──────────────┴──────────────┴──────┴──────────┴──────┴───────────┘
+```
+
+```sql
+CREATE TABLE event_table ON CLUSTER default (event_time DateTime, value String)
+ENGINE = ReplicatedMergeTree('/clickhouse/tables/event_table', '{replica}')
+ORDER BY event_time PARTITION BY toYYYYMM(event_time);
+
+INSERT INTO event_table ...
+```
+
+Then, we add a new node to the cluster, starting a new node with the same entry in the `remote_servers` section in a configuration file:
+
+```
+┌─cluster─┬─shard_num─┬─shard_weight─┬─replica_num─┬─host_name────┬─host_address─┬─port─┬─is_local─┬─user─┬─is_active─┐
+│ default │         1 │            1 │           1 │ 92d3c04025e8 │ 172.26.0.5   │ 9000 │        0 │      │      ᴺᵁᴸᴸ │
+│ default │         1 │            1 │           2 │ a6a68731c21b │ 172.26.0.4   │ 9000 │        1 │      │      ᴺᵁᴸᴸ │
+│ default │         1 │            1 │           3 │ 8e62b9cb17a1 │ 172.26.0.2   │ 9000 │        0 │      │      ᴺᵁᴸᴸ │
+│ default │         1 │            1 │           4 │ b0df3669b81f │ 172.26.0.6   │ 9000 │        0 │      │      ᴺᵁᴸᴸ │
+└─────────┴───────────┴──────────────┴─────────────┴──────────────┴──────────────┴──────┴──────────┴──────┴───────────┘
+```
+
+The fourth node is participating in the cluster, but table `event_table` still exists only on the first three nodes:
+
+
+```sql
+SELECT hostname(), database, table FROM clusterAllReplicas(default, system.tables) WHERE table = 'event_table' FORMAT PrettyCompactMonoBlock
+
+┌─hostname()───┬─database─┬─table───────┐
+│ a6a68731c21b │ default  │ event_table │
+│ 92d3c04025e8 │ default  │ event_table │
+│ 8e62b9cb17a1 │ default  │ event_table │
+└──────────────┴──────────┴─────────────┘
+```
+
+If you need to have tables replicated on all the nodes, you may use the [Replicated](../engines/database-engines/replicated.md) database engine in alternative to cluster discovery.
+
--- a/docs/en/operations/query-cache.md
+++ b/docs/en/operations/query-cache.md
@ -136,8 +136,24 @@ As a result, the query cache stores for each query multiple (partial)
 result blocks. While this behavior is a good default, it can be suppressed using setting
 [query_cache_squash_partial_query_results](settings/settings.md#query-cache-squash-partial-query-results).

-Also, results of queries with non-deterministic functions such as `rand()` and `now()` are not cached. This can be overruled using
-setting [query_cache_store_results_of_queries_with_nondeterministic_functions](settings/settings.md#query-cache-store-results-of-queries-with-nondeterministic-functions).
+Also, results of queries with non-deterministic functions are not cached. Such functions include
+- functions for accessing dictionaries: [`dictGet()`](../sql-reference/functions/ext-dict-functions.md#dictGet) etc.
+- [user-defined functions](../sql-reference/statements/create/function.md),
+- functions which return the current date or time: [`now()`](../sql-reference/functions/date-time-functions.md#now),
+  [`today()`](../sql-reference/functions/date-time-functions.md#today),
+  [`yesterday()`](../sql-reference/functions/date-time-functions.md#yesterday) etc.,
+- functions which return random values: [`randomString()`](../sql-reference/functions/random-functions.md#randomString),
+  [`fuzzBits()`](../sql-reference/functions/random-functions.md#fuzzBits) etc.,
+- functions whose result depends on the size and order or the internal chunks used for query processing:
+  [`nowInBlock()`](../sql-reference/functions/date-time-functions.md#nowInBlock) etc.,
+  [`rowNumberInBlock()`](../sql-reference/functions/other-functions.md#rowNumberInBlock),
+  [`runningDifference()`](../sql-reference/functions/other-functions.md#runningDifference),
+  [`blockSize()`](../sql-reference/functions/other-functions.md#blockSize) etc.,
+- functions which depend on the environment: [`currentUser()`](../sql-reference/functions/other-functions.md#currentUser),
+  [`queryID()`](../sql-reference/functions/other-functions.md#queryID),
+  [`getMacro()`](../sql-reference/functions/other-functions.md#getMacro) etc.
+Caching of non-deterministic functions can be forced regardless using setting
+[query_cache_store_results_of_queries_with_nondeterministic_functions](settings/settings.md#query-cache-store-results-of-queries-with-nondeterministic-functions).

 Finally, entries in the query cache are not shared between users due to security reasons. For example, user A must not be able to bypass a
 row policy on a table by running the same query as another user B for whom no such policy exists. However, if necessary, cache entries can
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@ -88,7 +88,7 @@ Default: 2

 ## background_merges_mutations_scheduling_policy

-The policy on how to perform a scheduling for background merges and mutations. Possible values are: `round_robin` and `shortest_task_first`. 
+The policy on how to perform a scheduling for background merges and mutations. Possible values are: `round_robin` and `shortest_task_first`.

 ## background_merges_mutations_scheduling_policy

@ -583,7 +583,7 @@ Both the cache for `local_disk`, and temporary data will be stored in `/tiny_loc

 Type: String

-Default: 
+Default:

 ## thread_pool_queue_size

@ -640,7 +640,7 @@ When `/disk1` is full, temporary data will be stored on `/disk2`.
 ```
 Type: String

-Default: 
+Default:

 ## uncompressed_cache_policy

@ -835,7 +835,7 @@ List of prefixes for [custom settings](../../operations/settings/index.md#custom

 - [Custom settings](../../operations/settings/index.md#custom_settings)

-## core_dump {#server_configuration_parameters-core_dump}
+## core_dump {#core_dump}

 Configures soft limit for core dump file size.

@ -924,7 +924,7 @@ The path to the table in ZooKeeper.
 <default_replica_name>{replica}</default_replica_name>
 ```

-## dictionaries_config {#server_configuration_parameters-dictionaries_config}
+## dictionaries_config {#dictionaries_config}

 The path to the config file for dictionaries.

@ -941,7 +941,7 @@ See also “[Dictionaries](../../sql-reference/dictionaries/index.md)”.
 <dictionaries_config>*_dictionary.xml</dictionaries_config>
 ```

-## user_defined_executable_functions_config {#server_configuration_parameters-user_defined_executable_functions_config}
+## user_defined_executable_functions_config {#user_defined_executable_functions_config}

 The path to the config file for executable user defined functions.

@ -958,7 +958,7 @@ See also “[Executable User Defined Functions](../../sql-reference/functions/in
 <user_defined_executable_functions_config>*_function.xml</user_defined_executable_functions_config>
 ```

-## dictionaries_lazy_load {#server_configuration_parameters-dictionaries_lazy_load}
+## dictionaries_lazy_load {#dictionaries_lazy_load}

 Lazy loading of dictionaries.

@ -974,7 +974,7 @@ The default is `true`.
 <dictionaries_lazy_load>true</dictionaries_lazy_load>
 ```

-## format_schema_path {#server_configuration_parameters-format_schema_path}
+## format_schema_path {#format_schema_path}

 The path to the directory with the schemes for the input data, such as schemas for the [CapnProto](../../interfaces/formats.md#capnproto) format.

@ -985,7 +985,7 @@ The path to the directory with the schemes for the input data, such as schemas f
  <format_schema_path>format_schemas/</format_schema_path>
 ```

-## graphite {#server_configuration_parameters-graphite}
+## graphite {#graphite}

 Sending data to [Graphite](https://github.com/graphite-project).

@ -1019,7 +1019,7 @@ You can configure multiple `<graphite>` clauses. For instance, you can use this
 </graphite>
 ```

-## graphite_rollup {#server_configuration_parameters-graphite-rollup}
+## graphite_rollup {#graphite-rollup}

 Settings for thinning data for Graphite.

@ -1051,7 +1051,7 @@ For more details, see [GraphiteMergeTree](../../engines/table-engines/mergetree-

 The port for connecting to the server over HTTP(s).

-If `https_port` is specified, [openSSL](#server_configuration_parameters-openssl) must be configured.
+If `https_port` is specified, [openSSL](#openssl) must be configured.

 If `http_port` is specified, the OpenSSL configuration is ignored even if it is set.

@ -1061,7 +1061,7 @@ If `http_port` is specified, the OpenSSL configuration is ignored even if it is
 <https_port>9999</https_port>
 ```

-## http_server_default_response {#server_configuration_parameters-http_server_default_response}
+## http_server_default_response {#http_server_default_response}

 The page that is shown by default when you access the ClickHouse HTTP(s) server.
 The default value is “Ok.” (with a line feed at the end)
@ -1086,7 +1086,7 @@ Expired time for HSTS in seconds. The default value is 0 means clickhouse disabl
 <hsts_max_age>600000</hsts_max_age>
 ```

-## include_from {#server_configuration_parameters-include_from}
+## include_from {#include_from}

 The path to the file with substitutions.

@ -1222,7 +1222,7 @@ The number of seconds that ClickHouse waits for incoming requests before closing
 <keep_alive_timeout>10</keep_alive_timeout>
 ```

-## listen_host {#server_configuration_parameters-listen_host}
+## listen_host {#listen_host}

 Restriction on hosts that requests can come from. If you want the server to answer all of them, specify `::`.

@ -1233,7 +1233,7 @@ Examples:
 <listen_host>127.0.0.1</listen_host>
 ```

-## listen_backlog {#server_configuration_parameters-listen_backlog}
+## listen_backlog {#listen_backlog}

 Backlog (queue size of pending connections) of the listen socket.

@ -1253,7 +1253,7 @@ Examples:
 <listen_backlog>4096</listen_backlog>
 ```

-## logger {#server_configuration_parameters-logger}
+## logger {#logger}

 Logging settings.

@ -1357,7 +1357,7 @@ Keys for syslog:
    Default value: `LOG_USER` if `address` is specified, `LOG_DAEMON` otherwise.
 - format – Message format. Possible values: `bsd` and `syslog.`

-## send_crash_reports {#server_configuration_parameters-send_crash_reports}
+## send_crash_reports {#send_crash_reports}

 Settings for opt-in sending crash reports to the ClickHouse core developers team via [Sentry](https://sentry.io).
 Enabling it, especially in pre-production environments, is highly appreciated.
@ -1629,7 +1629,7 @@ Default value: `0.5`.



-## merge_tree {#server_configuration_parameters-merge_tree}
+## merge_tree {#merge_tree}

 Fine tuning for tables in the [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md).

@ -1676,7 +1676,7 @@ To disable `metric_log` setting, you should create the following file `/etc/clic
 </clickhouse>
 ```

-## replicated_merge_tree {#server_configuration_parameters-replicated_merge_tree}
+## replicated_merge_tree {#replicated_merge_tree}

 Fine tuning for tables in the [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/mergetree.md).

@ -1692,7 +1692,7 @@ For more information, see the MergeTreeSettings.h header file.
 </replicated_merge_tree>
 ```

-## openSSL {#server_configuration_parameters-openssl}
+## openSSL {#openssl}

 SSL client/server configuration.

@ -1751,7 +1751,7 @@ Keys for server/client settings:
 </openSSL>
 ```

-## part_log {#server_configuration_parameters-part-log}
+## part_log {#part-log}

 Logging events that are associated with [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md). For instance, adding or merging data. You can use the log to simulate merge algorithms and compare their characteristics. You can visualize the merge process.

@ -1791,7 +1791,7 @@ Default: false.
 </part_log>
 ```

-## path {#server_configuration_parameters-path}
+## path {#path}

 The path to the directory containing data.

@ -1805,7 +1805,7 @@ The trailing slash is mandatory.
 <path>/var/lib/clickhouse/</path>
 ```

-## Prometheus {#server_configuration_parameters-prometheus}
+## Prometheus {#prometheus}

 Exposing metrics data for scraping from [Prometheus](https://prometheus.io).

@ -1841,7 +1841,7 @@ Check (replace `127.0.0.1` with the IP addr or hostname of your ClickHouse serve
 curl 127.0.0.1:9363/metrics
 ```

-## query_log {#server_configuration_parameters-query-log}
+## query_log {#query-log}

 Setting for logging queries received with the [log_queries=1](../../operations/settings/settings.md) setting.

@ -1911,7 +1911,7 @@ Data for the query cache is allocated in DRAM. If memory is scarce, make sure to
 </query_cache>
 ```

-## query_thread_log {#server_configuration_parameters-query_thread_log}
+## query_thread_log {#query_thread_log}

 Setting for logging threads of queries received with the [log_query_threads=1](../../operations/settings/settings.md#settings-log-query-threads) setting.

@ -1948,12 +1948,12 @@ If the table does not exist, ClickHouse will create it. If the structure of the
    <flush_interval_milliseconds>7500</flush_interval_milliseconds>
    <max_size_rows>1048576</max_size_rows>
    <reserved_size_rows>8192</reserved_size_rows>
-    <buffer_size_rows_flush_threshold>524288</buffer_size_rows_flush_threshold>  
+    <buffer_size_rows_flush_threshold>524288</buffer_size_rows_flush_threshold>
    <flush_on_crash>false</flush_on_crash>
 </query_thread_log>
 ```

-## query_views_log {#server_configuration_parameters-query_views_log}
+## query_views_log {#query_views_log}

 Setting for logging views (live, materialized etc) dependant of queries received with the [log_query_views=1](../../operations/settings/settings.md#settings-log-query-views) setting.

@ -1995,7 +1995,7 @@ If the table does not exist, ClickHouse will create it. If the structure of the
 </query_views_log>
 ```

-## text_log {#server_configuration_parameters-text_log}
+## text_log {#text_log}

 Settings for the [text_log](../../operations/system-tables/text_log.md#system_tables-text_log) system table for logging text messages.

@ -2037,7 +2037,7 @@ Default: false.
 </clickhouse>
 ```

-## trace_log {#server_configuration_parameters-trace_log}
+## trace_log {#trace_log}

 Settings for the [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) system table operation.

@ -2073,7 +2073,7 @@ The default server configuration file `config.xml` contains the following settin
 </trace_log>
 ```

-## asynchronous_insert_log {#server_configuration_parameters-asynchronous_insert_log}
+## asynchronous_insert_log {#asynchronous_insert_log}

 Settings for the [asynchronous_insert_log](../../operations/system-tables/asynchronous_insert_log.md#system_tables-asynchronous_insert_log) system table for logging async inserts.

@ -2112,7 +2112,7 @@ Default: false.
 </clickhouse>
 ```

-## crash_log {#server_configuration_parameters-crash_log}
+## crash_log {#crash_log}

 Settings for the [crash_log](../../operations/system-tables/crash-log.md) system table operation.

@ -2150,7 +2150,7 @@ The default server configuration file `config.xml` contains the following settin
 </crash_log>
 ```

-## backup_log {#server_configuration_parameters-backup_log}
+## backup_log {#backup_log}

 Settings for the [backup_log](../../operations/system-tables/backup_log.md) system table for logging `BACKUP` and `RESTORE` operations.

@ -2236,8 +2236,10 @@ For the value of the `incl` attribute, see the section “[Configuration files](
 **See Also**

 - [skip_unavailable_shards](../../operations/settings/settings.md#settings-skip_unavailable_shards)
+- [Cluster Discovery](../../operations/cluster-discovery.md)
+- [Replicated database engine](../../engines/database-engines/replicated.md)

-## timezone {#server_configuration_parameters-timezone}
+## timezone {#timezone}

 The server’s time zone.

@ -2255,7 +2257,7 @@ The time zone is necessary for conversions between String and DateTime formats w

 - [session_timezone](../settings/settings.md#session_timezone)

-## tcp_port {#server_configuration_parameters-tcp_port}
+## tcp_port {#tcp_port}

 Port for communicating with clients over the TCP protocol.

@ -2265,9 +2267,9 @@ Port for communicating with clients over the TCP protocol.
 <tcp_port>9000</tcp_port>
 ```

-## tcp_port_secure {#server_configuration_parameters-tcp_port_secure}
+## tcp_port_secure {#tcp_port_secure}

-TCP port for secure communication with clients. Use it with [OpenSSL](#server_configuration_parameters-openssl) settings.
+TCP port for secure communication with clients. Use it with [OpenSSL](#openssl) settings.

 **Possible values**

@ -2279,7 +2281,7 @@ Positive integer.
 <tcp_port_secure>9440</tcp_port_secure>
 ```

-## mysql_port {#server_configuration_parameters-mysql_port}
+## mysql_port {#mysql_port}

 Port for communicating with clients over MySQL protocol.

@ -2293,7 +2295,7 @@ Example
 <mysql_port>9004</mysql_port>
 ```

-## postgresql_port {#server_configuration_parameters-postgresql_port}
+## postgresql_port {#postgresql_port}

 Port for communicating with clients over PostgreSQL protocol.

@ -2324,7 +2326,7 @@ Path on the local filesystem to store temporary data for processing large querie
 ```


-## user_files_path {#server_configuration_parameters-user_files_path}
+## user_files_path {#user_files_path}

 The directory with user files. Used in the table function [file()](../../sql-reference/table-functions/file.md).

@ -2334,7 +2336,7 @@ The directory with user files. Used in the table function [file()](../../sql-ref
 <user_files_path>/var/lib/clickhouse/user_files/</user_files_path>
 ```

-## user_scripts_path {#server_configuration_parameters-user_scripts_path}
+## user_scripts_path {#user_scripts_path}

 The directory with user scripts files. Used for Executable user defined functions [Executable User Defined Functions](../../sql-reference/functions/index.md#executable-user-defined-functions).

@ -2344,7 +2346,7 @@ The directory with user scripts files. Used for Executable user defined function
 <user_scripts_path>/var/lib/clickhouse/user_scripts/</user_scripts_path>
 ```

-## user_defined_path {#server_configuration_parameters-user_defined_path}
+## user_defined_path {#user_defined_path}

 The directory with user defined files. Used for SQL user defined functions [SQL User Defined Functions](../../sql-reference/functions/index.md#user-defined-functions).

@ -2404,7 +2406,7 @@ This section contains the following parameters:
  * nearest_hostname - selects a ZooKeeper node with a hostname that is most similar to the server’s hostname.
  * first_or_random - selects the first ZooKeeper node, if it's not available then randomly selects one of remaining ZooKeeper nodes.
  * round_robin - selects the first ZooKeeper node, if reconnection happens selects the next.
-    
+
 **Example configuration**

 ``` xml
@ -2440,7 +2442,7 @@ Storage method for data part headers in ZooKeeper.

 This setting only applies to the `MergeTree` family. It can be specified:

- Globally in the [merge_tree](#server_configuration_parameters-merge_tree) section of the `config.xml` file.
+- Globally in the [merge_tree](#merge_tree) section of the `config.xml` file.

    ClickHouse uses the setting for all the tables on the server. You can change the setting at any time. Existing tables change their behaviour when the setting changes.

--- a/docs/en/operations/settings/permissions-for-queries.md
+++ b/docs/en/operations/settings/permissions-for-queries.md
@ -48,7 +48,7 @@ Setting `readonly = 1` prohibits the user from changing settings. There is a way
 :::


-## allow_ddl {#settings_allow_ddl}
+## allow_ddl {#allow_ddl}

 Allows or denies [DDL](https://en.wikipedia.org/wiki/Data_definition_language) queries.

--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@ -177,7 +177,7 @@ If `enable_optimize_predicate_expression = 1`, then the execution time of these

 If `enable_optimize_predicate_expression = 0`, then the execution time of the second query is much longer because the `WHERE` clause applies to all the data after the subquery finishes.

-## fallback_to_stale_replicas_for_distributed_queries {#settings-fallback_to_stale_replicas_for_distributed_queries}
+## fallback_to_stale_replicas_for_distributed_queries {#fallback_to_stale_replicas_for_distributed_queries}

 Forces a query to an out-of-date replica if updated data is not available. See [Replication](../../engines/table-engines/mergetree-family/replication.md).

@ -187,7 +187,7 @@ Used when performing `SELECT` from a distributed table that points to replicated

 By default, 1 (enabled).

-## force_index_by_date {#settings-force_index_by_date}
+## force_index_by_date {#force_index_by_date}

 Disables query execution if the index can’t be used by date.

@ -203,7 +203,7 @@ Works with tables in the MergeTree family.

 If `force_primary_key=1`, ClickHouse checks to see if the query has a primary key condition that can be used for restricting data ranges. If there is no suitable condition, it throws an exception. However, it does not check whether the condition reduces the amount of data to read. For more information about data ranges in MergeTree tables, see [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md).

-## use_skip_indexes {#settings-use_skip_indexes}
+## use_skip_indexes {#use_skip_indexes}

 Use data skipping indexes during query execution.

@ -214,7 +214,7 @@ Possible values:

 Default value: 1.

-## force_data_skipping_indices {#settings-force_data_skipping_indices}
+## force_data_skipping_indices {#force_data_skipping_indices}

 Disables query execution if passed data skipping indices wasn't used.

@ -241,7 +241,7 @@ SELECT * FROM data_01515 WHERE d1 = 0 SETTINGS force_data_skipping_indices='`d1_
 SELECT * FROM data_01515 WHERE d1 = 0 AND assumeNotNull(d1_null) = 0 SETTINGS force_data_skipping_indices='`d1_idx`, d1_null_idx'; -- Ok.
 ```

-## ignore_data_skipping_indices {#settings-ignore_data_skipping_indices}
+## ignore_data_skipping_indices {#ignore_data_skipping_indices}

 Ignores the skipping indexes specified if used by the query.

@ -401,7 +401,7 @@ Enables or disables [fsync](http://pubs.opengroup.org/onlinepubs/9699919799/func

 It makes sense to disable it if the server has millions of tiny tables that are constantly being created and destroyed.

-## function_range_max_elements_in_block {#settings-function_range_max_elements_in_block}
+## function_range_max_elements_in_block {#function_range_max_elements_in_block}

 Sets the safety threshold for data volume generated by function [range](../../sql-reference/functions/array-functions.md/#range). Defines the maximum number of values generated by function per block of data (sum of array sizes for every row in a block).

@ -416,7 +416,7 @@ Default value: `500,000,000`.
 - [max_block_size](#setting-max_block_size)
 - [min_insert_block_size_rows](#min-insert-block-size-rows)

-## enable_http_compression {#settings-enable_http_compression}
+## enable_http_compression {#enable_http_compression}

 Enables or disables data compression in the response to an HTTP request.

@ -429,15 +429,15 @@ Possible values:

 Default value: 0.

-## http_zlib_compression_level {#settings-http_zlib_compression_level}
+## http_zlib_compression_level {#http_zlib_compression_level}

-Sets the level of data compression in the response to an HTTP request if [enable_http_compression = 1](#settings-enable_http_compression).
+Sets the level of data compression in the response to an HTTP request if [enable_http_compression = 1](#enable_http_compression).

 Possible values: Numbers from 1 to 9.

 Default value: 3.

-## http_native_compression_disable_checksumming_on_decompress {#settings-http_native_compression_disable_checksumming_on_decompress}
+## http_native_compression_disable_checksumming_on_decompress {#http_native_compression_disable_checksumming_on_decompress}

 Enables or disables checksum verification when decompressing the HTTP POST data from the client. Used only for ClickHouse native compression format (not used with `gzip` or `deflate`).

@ -480,7 +480,7 @@ Possible values:

 Default value: `1000`.

-## send_progress_in_http_headers {#settings-send_progress_in_http_headers}
+## send_progress_in_http_headers {#send_progress_in_http_headers}

 Enables or disables `X-ClickHouse-Progress` HTTP response headers in `clickhouse-server` responses.

@ -518,7 +518,7 @@ Possible values:

 Default value: `1`.

-## join_default_strictness {#settings-join_default_strictness}
+## join_default_strictness {#join_default_strictness}

 Sets default strictness for [JOIN clauses](../../sql-reference/statements/select/join.md/#select-join).

@ -531,7 +531,7 @@ Possible values:

 Default value: `ALL`.

-## join_algorithm {#settings-join_algorithm}
+## join_algorithm {#join_algorithm}

 Specifies which [JOIN](../../sql-reference/statements/select/join.md) algorithm is used.

@ -547,7 +547,7 @@ Possible values:

 [Grace hash join](https://en.wikipedia.org/wiki/Hash_join#Grace_hash_join) is used.  Grace hash provides an algorithm option that provides performant complex joins while limiting memory use.

- The first phase of a grace join reads the right table and splits it into N buckets depending on the hash value of key columns (initially, N is `grace_hash_join_initial_buckets`). This is done in a way to ensure that each bucket can be processed independently. Rows from the first bucket are added to an in-memory hash table while the others are saved to disk. If the hash table grows beyond the memory limit (e.g., as set by [`max_bytes_in_join`](/docs/en/operations/settings/query-complexity.md/#settings-max_bytes_in_join)), the number of buckets is increased and the assigned bucket for each row. Any rows which don’t belong to the current bucket are flushed and reassigned.
+ The first phase of a grace join reads the right table and splits it into N buckets depending on the hash value of key columns (initially, N is `grace_hash_join_initial_buckets`). This is done in a way to ensure that each bucket can be processed independently. Rows from the first bucket are added to an in-memory hash table while the others are saved to disk. If the hash table grows beyond the memory limit (e.g., as set by [`max_bytes_in_join`](/docs/en/operations/settings/query-complexity.md/#max_bytes_in_join)), the number of buckets is increased and the assigned bucket for each row. Any rows which don’t belong to the current bucket are flushed and reassigned.

 Supports `INNER/LEFT/RIGHT/FULL ALL/ANY JOIN`.

@ -588,7 +588,7 @@ Possible values:
 ClickHouse always tries to use `partial_merge` join if possible, otherwise, it uses `hash`. *Deprecated*, same as `partial_merge,hash`.


-## join_any_take_last_row {#settings-join_any_take_last_row}
+## join_any_take_last_row {#join_any_take_last_row}

 Changes the behaviour of join operations with `ANY` strictness.

@ -607,7 +607,7 @@ See also:

 - [JOIN clause](../../sql-reference/statements/select/join.md/#select-join)
 - [Join table engine](../../engines/table-engines/special/join.md)
- [join_default_strictness](#settings-join_default_strictness)
+- [join_default_strictness](#join_default_strictness)

 ## join_use_nulls {#join_use_nulls}

@ -879,7 +879,7 @@ Possible values:

 Default value: 2013265920.

-## min_bytes_to_use_direct_io {#settings-min-bytes-to-use-direct-io}
+## min_bytes_to_use_direct_io {#min-bytes-to-use-direct-io}

 The minimum data volume required for using direct I/O access to the storage disk.

@ -917,7 +917,7 @@ Possible values:

 Default value: `1`.

-## log_queries {#settings-log-queries}
+## log_queries {#log-queries}

 Setting up query logging.

@ -929,7 +929,7 @@ Example:
 log_queries=1
 ```

-## log_queries_min_query_duration_ms {#settings-log-queries-min-query-duration-ms}
+## log_queries_min_query_duration_ms {#log-queries-min-query-duration-ms}

 If enabled (non-zero), queries faster than the value of this setting will not be logged (you can think about this as a `long_query_time` for [MySQL Slow Query Log](https://dev.mysql.com/doc/refman/5.7/en/slow-query-log.html)), and this basically means that you will not find them in the following tables:

@ -944,7 +944,7 @@ Only the queries with the following type will get to the log:
 - Type: milliseconds
 - Default value: 0 (any query)

-## log_queries_min_type {#settings-log-queries-min-type}
+## log_queries_min_type {#log-queries-min-type}

 `query_log` minimal type to log.

@ -962,11 +962,11 @@ Can be used to limit which entities will go to `query_log`, say you are interest
 log_queries_min_type='EXCEPTION_WHILE_PROCESSING'
 ```

-## log_query_threads {#settings-log-query-threads}
+## log_query_threads {#log-query-threads}

 Setting up query threads logging.

-Query threads log into the [system.query_thread_log](../../operations/system-tables/query_thread_log.md) table. This setting has effect only when [log_queries](#settings-log-queries) is true. Queries’ threads run by ClickHouse with this setup are logged according to the rules in the [query_thread_log](../../operations/server-configuration-parameters/settings.md/#server_configuration_parameters-query_thread_log) server configuration parameter.
+Query threads log into the [system.query_thread_log](../../operations/system-tables/query_thread_log.md) table. This setting has effect only when [log_queries](#log-queries) is true. Queries’ threads run by ClickHouse with this setup are logged according to the rules in the [query_thread_log](../../operations/server-configuration-parameters/settings.md/#server_configuration_parameters-query_thread_log) server configuration parameter.

 Possible values:

@ -981,7 +981,7 @@ Default value: `1`.
 log_query_threads=1
 ```

-## log_query_views {#settings-log-query-views}
+## log_query_views {#log-query-views}

 Setting up query views logging.

@ -993,7 +993,7 @@ Example:
 log_query_views=1
 ```

-## log_formatted_queries {#settings-log-formatted-queries}
+## log_formatted_queries {#log-formatted-queries}

 Allows to log formatted queries to the [system.query_log](../../operations/system-tables/query_log.md) system table (populates `formatted_query` column in the [system.query_log](../../operations/system-tables/query_log.md)).

@ -1004,7 +1004,7 @@ Possible values:

 Default value: `0`.

-## log_comment {#settings-log-comment}
+## log_comment {#log-comment}

 Specifies the value for the `log_comment` field of the [system.query_log](../system-tables/query_log.md) table and comment text for the server log.

@ -1012,7 +1012,7 @@ It can be used to improve the readability of server logs. Additionally, it helps

 Possible values:

- Any string no longer than [max_query_size](#settings-max_query_size). If the max_query_size is exceeded, the server throws an exception.
+- Any string no longer than [max_query_size](#max_query_size). If the max_query_size is exceeded, the server throws an exception.

 Default value: empty string.

@ -1036,7 +1036,7 @@ Result:
 └─────────────┴───────────┘
 ```

-## log_processors_profiles {#settings-log_processors_profiles}
+## log_processors_profiles {#log_processors_profiles}

 Write time that processor spent during execution/waiting for data to `system.processors_profile_log` table.

@ -1045,7 +1045,7 @@ See also:
 - [`system.processors_profile_log`](../../operations/system-tables/processors_profile_log.md)
 - [`EXPLAIN PIPELINE`](../../sql-reference/statements/explain.md#explain-pipeline)

-## max_insert_block_size {#settings-max_insert_block_size}
+## max_insert_block_size {#max_insert_block_size}

 The size of blocks (in a count of rows) to form for insertion into a table.
 This setting only applies in cases when the server forms the blocks.
@ -1079,7 +1079,7 @@ Possible values:

 Default value: 268435456.

-## max_replica_delay_for_distributed_queries {#settings-max_replica_delay_for_distributed_queries}
+## max_replica_delay_for_distributed_queries {#max_replica_delay_for_distributed_queries}

 Disables lagging replicas for distributed queries. See [Replication](../../engines/table-engines/mergetree-family/replication.md).

@ -1096,7 +1096,7 @@ Default value: 300.

 Used when performing `SELECT` from a distributed table that points to replicated tables.

-## max_threads {#settings-max_threads}
+## max_threads {#max_threads}

 The maximum number of query processing threads, excluding threads for retrieving data from remote servers (see the ‘max_distributed_connections’ parameter).

@ -1109,7 +1109,7 @@ For queries that are completed quickly because of a LIMIT, you can set a lower

 The smaller the `max_threads` value, the less memory is consumed.

-## max_insert_threads {#settings-max-insert-threads}
+## max_insert_threads {#max-insert-threads}

 The maximum number of threads to execute the `INSERT SELECT` query.

@ -1120,7 +1120,7 @@ Possible values:

 Default value: 0.

-Parallel `INSERT SELECT` has effect only if the `SELECT` part is executed in parallel, see [max_threads](#settings-max_threads) setting.
+Parallel `INSERT SELECT` has effect only if the `SELECT` part is executed in parallel, see [max_threads](#max_threads) setting.
 Higher values will lead to higher memory usage.

 ## max_compress_block_size {#max-compress-block-size}
@ -1149,7 +1149,7 @@ We are writing a URL column with the String type (average size of 60 bytes per v
 This is an expert-level setting, and you shouldn't change it if you're just getting started with ClickHouse.
 :::

-## max_query_size {#settings-max_query_size}
+## max_query_size {#max_query_size}

 The maximum number of bytes of a query string parsed by the SQL parser.
 Data in the VALUES clause of INSERT queries is processed by a separate stream parser (that consumes O(1) RAM) and not affected by this restriction.
@ -1393,7 +1393,7 @@ Default value: 5000.

 ## stream_flush_interval_ms {#stream-flush-interval-ms}

-Works for tables with streaming in the case of a timeout, or when a thread generates [max_insert_block_size](#settings-max_insert_block_size) rows.
+Works for tables with streaming in the case of a timeout, or when a thread generates [max_insert_block_size](#max_insert_block_size) rows.

 The default value is 7500.

@ -1405,7 +1405,7 @@ Timeout for polling data from/to streaming storages.

 Default value: 500.

-## load_balancing {#settings-load_balancing}
+## load_balancing {#load_balancing}

 Specifies the algorithm of replicas selection that is used for distributed query processing.

@ -1419,7 +1419,7 @@ ClickHouse supports the following algorithms of choosing replicas:

 See also:

- [distributed_replica_max_ignored_errors](#settings-distributed_replica_max_ignored_errors)
+- [distributed_replica_max_ignored_errors](#distributed_replica_max_ignored_errors)

 ### Random (by Default) {#load_balancing-random}

@ -1473,20 +1473,20 @@ load_balancing = round_robin

 This algorithm uses a round-robin policy across replicas with the same number of errors (only the queries with `round_robin` policy is accounted).

-## prefer_localhost_replica {#settings-prefer-localhost-replica}
+## prefer_localhost_replica {#prefer-localhost-replica}

 Enables/disables preferable using the localhost replica when processing distributed queries.

 Possible values:

 - 1 — ClickHouse always sends a query to the localhost replica if it exists.
- 0 — ClickHouse uses the balancing strategy specified by the [load_balancing](#settings-load_balancing) setting.
+- 0 — ClickHouse uses the balancing strategy specified by the [load_balancing](#load_balancing) setting.

 Default value: 1.

 :::note
-Disable this setting if you use [max_parallel_replicas](#settings-max_parallel_replicas) without [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key).
-If [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key) is set, disable this setting only if it's used on a cluster with multiple shards containing multiple replicas.
+Disable this setting if you use [max_parallel_replicas](#max_parallel_replicas) without [parallel_replicas_custom_key](#parallel_replicas_custom_key).
+If [parallel_replicas_custom_key](#parallel_replicas_custom_key) is set, disable this setting only if it's used on a cluster with multiple shards containing multiple replicas.
 If it's used on a cluster with a single shard and multiple replicas, disabling this setting will have negative effects.
 :::

@ -1500,7 +1500,7 @@ See the section “WITH TOTALS modifier”.
 The threshold for `totals_mode = 'auto'`.
 See the section “WITH TOTALS modifier”.

-## max_parallel_replicas {#settings-max_parallel_replicas}
+## max_parallel_replicas {#max_parallel_replicas}

 The maximum number of replicas for each shard when executing a query.

@ -1527,23 +1527,23 @@ A query may be processed faster if it is executed on several servers in parallel
 - The sampling key is an expression that is expensive to calculate.
 - The cluster latency distribution has a long tail, so that querying more servers increases the query overall latency.

-### Parallel processing using [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key)
+### Parallel processing using [parallel_replicas_custom_key](#parallel_replicas_custom_key)

 This setting is useful for any replicated table.

-## parallel_replicas_custom_key {#settings-parallel_replicas_custom_key}
+## parallel_replicas_custom_key {#parallel_replicas_custom_key}

 An arbitrary integer expression that can be used to split work between replicas for a specific table.
 The value can be any integer expression.
-A query may be processed faster if it is executed on several servers in parallel but it depends on the used [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key)
-and [parallel_replicas_custom_key_filter_type](#settings-parallel_replicas_custom_key_filter_type).
+A query may be processed faster if it is executed on several servers in parallel but it depends on the used [parallel_replicas_custom_key](#parallel_replicas_custom_key)
+and [parallel_replicas_custom_key_filter_type](#parallel_replicas_custom_key_filter_type).

 Simple expressions using primary keys are preferred.

 If the setting is used on a cluster that consists of a single shard with multiple replicas, those replicas will be converted into virtual shards.
 Otherwise, it will behave same as for `SAMPLE` key, it will use multiple replicas of each shard.

-## parallel_replicas_custom_key_filter_type {#settings-parallel_replicas_custom_key_filter_type}
+## parallel_replicas_custom_key_filter_type {#parallel_replicas_custom_key_filter_type}

 How to use `parallel_replicas_custom_key` expression for splitting work between replicas.

@ -1732,7 +1732,7 @@ Possible values:

 Default value: 0 (no restriction).

-## insert_quorum {#settings-insert_quorum}
+## insert_quorum {#insert_quorum}

 Enables the quorum writes.

@ -1746,7 +1746,7 @@ Quorum writes

 `INSERT` succeeds only when ClickHouse manages to correctly write data to the `insert_quorum` of replicas during the `insert_quorum_timeout`. If for any reason the number of replicas with successful writes does not reach the `insert_quorum`, the write is considered failed and ClickHouse will delete the inserted block from all the replicas where data has already been written.

-When `insert_quorum_parallel` is disabled, all replicas in the quorum are consistent, i.e. they contain data from all previous `INSERT` queries (the `INSERT` sequence is linearized). When reading data written using `insert_quorum` and `insert_quorum_parallel` is disabled, you can turn on sequential consistency for `SELECT` queries using [select_sequential_consistency](#settings-select_sequential_consistency).
+When `insert_quorum_parallel` is disabled, all replicas in the quorum are consistent, i.e. they contain data from all previous `INSERT` queries (the `INSERT` sequence is linearized). When reading data written using `insert_quorum` and `insert_quorum_parallel` is disabled, you can turn on sequential consistency for `SELECT` queries using [select_sequential_consistency](#select_sequential_consistency).

 ClickHouse generates an exception:

@ -1755,11 +1755,11 @@ ClickHouse generates an exception:

 See also:

- [insert_quorum_timeout](#settings-insert_quorum_timeout)
- [insert_quorum_parallel](#settings-insert_quorum_parallel)
- [select_sequential_consistency](#settings-select_sequential_consistency)
+- [insert_quorum_timeout](#insert_quorum_timeout)
+- [insert_quorum_parallel](#insert_quorum_parallel)
+- [select_sequential_consistency](#select_sequential_consistency)

-## insert_quorum_timeout {#settings-insert_quorum_timeout}
+## insert_quorum_timeout {#insert_quorum_timeout}

 Write to a quorum timeout in milliseconds. If the timeout has passed and no write has taken place yet, ClickHouse will generate an exception and the client must repeat the query to write the same block to the same or any other replica.

@ -1767,11 +1767,11 @@ Default value: 600 000 milliseconds (ten minutes).

 See also:

- [insert_quorum](#settings-insert_quorum)
- [insert_quorum_parallel](#settings-insert_quorum_parallel)
- [select_sequential_consistency](#settings-select_sequential_consistency)
+- [insert_quorum](#insert_quorum)
+- [insert_quorum_parallel](#insert_quorum_parallel)
+- [select_sequential_consistency](#select_sequential_consistency)

-## insert_quorum_parallel {#settings-insert_quorum_parallel}
+## insert_quorum_parallel {#insert_quorum_parallel}

 Enables or disables parallelism for quorum `INSERT` queries. If enabled, additional `INSERT` queries can be sent while previous queries have not yet finished. If disabled, additional writes to the same table will be rejected.

@ -1784,11 +1784,11 @@ Default value: 1.

 See also:

- [insert_quorum](#settings-insert_quorum)
- [insert_quorum_timeout](#settings-insert_quorum_timeout)
- [select_sequential_consistency](#settings-select_sequential_consistency)
+- [insert_quorum](#insert_quorum)
+- [insert_quorum_timeout](#insert_quorum_timeout)
+- [select_sequential_consistency](#select_sequential_consistency)

-## select_sequential_consistency {#settings-select_sequential_consistency}
+## select_sequential_consistency {#select_sequential_consistency}

 Enables or disables sequential consistency for `SELECT` queries. Requires `insert_quorum_parallel` to be disabled (enabled by default).

@ -1807,11 +1807,11 @@ When `insert_quorum_parallel` is enabled (the default), then `select_sequential_

 See also:

- [insert_quorum](#settings-insert_quorum)
- [insert_quorum_timeout](#settings-insert_quorum_timeout)
- [insert_quorum_parallel](#settings-insert_quorum_parallel)
+- [insert_quorum](#insert_quorum)
+- [insert_quorum_timeout](#insert_quorum_timeout)
+- [insert_quorum_parallel](#insert_quorum_parallel)

-## insert_deduplicate {#settings-insert-deduplicate}
+## insert_deduplicate {#insert-deduplicate}

 Enables or disables block deduplication of `INSERT` (for Replicated\* tables).

@ -1938,7 +1938,7 @@ For the replicated tables, by default, only 10000 of the most recent inserts for
 We recommend enabling the [async_block_ids_cache](merge-tree-settings.md/#use-async-block-ids-cache) to increase the efficiency of deduplication.
 This function does not work for non-replicated tables.

-## deduplicate_blocks_in_dependent_materialized_views {#settings-deduplicate-blocks-in-dependent-materialized-views}
+## deduplicate_blocks_in_dependent_materialized_views {#deduplicate-blocks-in-dependent-materialized-views}

 Enables or disables the deduplication check for materialized views that receive data from Replicated\* tables.

@ -2048,7 +2048,7 @@ Possible values:

 Default value: 10000

-## max_network_bytes {#settings-max-network-bytes}
+## max_network_bytes {#max-network-bytes}

 Limits the data volume (in bytes) that is received or transmitted over the network when executing a query. This setting applies to every individual query.

@ -2059,7 +2059,7 @@ Possible values:

 Default value: 0.

-## max_network_bandwidth {#settings-max-network-bandwidth}
+## max_network_bandwidth {#max-network-bandwidth}

 Limits the speed of the data exchange over the network in bytes per second. This setting applies to every query.

@ -2070,7 +2070,7 @@ Possible values:

 Default value: 0.

-## max_network_bandwidth_for_user {#settings-max-network-bandwidth-for-user}
+## max_network_bandwidth_for_user {#max-network-bandwidth-for-user}

 Limits the speed of the data exchange over the network in bytes per second. This setting applies to all concurrently running queries performed by a single user.

@ -2081,7 +2081,7 @@ Possible values:

 Default value: 0.

-## max_network_bandwidth_for_all_users {#settings-max-network-bandwidth-for-all-users}
+## max_network_bandwidth_for_all_users {#max-network-bandwidth-for-all-users}

 Limits the speed that data is exchanged at over the network in bytes per second. This setting applies to all concurrently running queries on the server.

@ -2092,7 +2092,7 @@ Possible values:

 Default value: 0.

-## count_distinct_implementation {#settings-count_distinct_implementation}
+## count_distinct_implementation {#count_distinct_implementation}

 Specifies which of the `uniq*` functions should be used to perform the [COUNT(DISTINCT …)](../../sql-reference/aggregate-functions/reference/count.md/#agg_function-count) construction.

@ -2106,7 +2106,7 @@ Possible values:

 Default value: `uniqExact`.

-## skip_unavailable_shards {#settings-skip_unavailable_shards}
+## skip_unavailable_shards {#skip_unavailable_shards}

 Enables or disables silently skipping of unavailable shards.

@ -2270,7 +2270,7 @@ Possible values:

 Default value: 0

-## force_optimize_skip_unused_shards_nesting {#settings-force_optimize_skip_unused_shards_nesting}
+## force_optimize_skip_unused_shards_nesting {#force_optimize_skip_unused_shards_nesting}

 Controls [`force_optimize_skip_unused_shards`](#force-optimize-skip-unused-shards) (hence still requires [`force_optimize_skip_unused_shards`](#force-optimize-skip-unused-shards)) depends on the nesting level of the distributed query (case when you have `Distributed` table that look into another `Distributed` table).

@ -2400,7 +2400,7 @@ Enables caching of rows number during count from files in table functions `file`

 Enabled by default.

-## distributed_replica_error_half_life {#settings-distributed_replica_error_half_life}
+## distributed_replica_error_half_life {#distributed_replica_error_half_life}

 - Type: seconds
 - Default value: 60 seconds
@ -2411,10 +2411,10 @@ See also:

 - [load_balancing](#load_balancing-round_robin)
 - [Table engine Distributed](../../engines/table-engines/special/distributed.md)
- [distributed_replica_error_cap](#settings-distributed_replica_error_cap)
- [distributed_replica_max_ignored_errors](#settings-distributed_replica_max_ignored_errors)
+- [distributed_replica_error_cap](#distributed_replica_error_cap)
+- [distributed_replica_max_ignored_errors](#distributed_replica_max_ignored_errors)

-## distributed_replica_error_cap {#settings-distributed_replica_error_cap}
+## distributed_replica_error_cap {#distributed_replica_error_cap}

 - Type: unsigned int
 - Default value: 1000
@ -2425,10 +2425,10 @@ See also:

 - [load_balancing](#load_balancing-round_robin)
 - [Table engine Distributed](../../engines/table-engines/special/distributed.md)
- [distributed_replica_error_half_life](#settings-distributed_replica_error_half_life)
- [distributed_replica_max_ignored_errors](#settings-distributed_replica_max_ignored_errors)
+- [distributed_replica_error_half_life](#distributed_replica_error_half_life)
+- [distributed_replica_max_ignored_errors](#distributed_replica_max_ignored_errors)

-## distributed_replica_max_ignored_errors {#settings-distributed_replica_max_ignored_errors}
+## distributed_replica_max_ignored_errors {#distributed_replica_max_ignored_errors}

 - Type: unsigned int
 - Default value: 0
@ -2439,7 +2439,7 @@ See also:

 - [load_balancing](#load_balancing-round_robin)
 - [Table engine Distributed](../../engines/table-engines/special/distributed.md)
- [distributed_replica_error_cap](#settings-distributed_replica_error_cap)
+- [distributed_replica_error_cap](#distributed_replica_error_cap)
 - [distributed_replica_error_half_life](#settings-distributed_replica_error_half_life)

 ## distributed_directory_monitor_sleep_time_ms {#distributed_directory_monitor_sleep_time_ms}
@ -2595,7 +2595,7 @@ Possible values:

 Default value: 0.

-## allow_introspection_functions {#settings-allow_introspection_functions}
+## allow_introspection_functions {#allow_introspection_functions}

 Enables or disables [introspection functions](../../sql-reference/functions/introspection.md) for query profiling.

@ -3136,7 +3136,7 @@ Do not enable this feature in version `<= 21.8`. It's not properly implemented a
 ## aggregate_functions_null_for_empty {#aggregate_functions_null_for_empty}

 Enables or disables rewriting all aggregate functions in a query, adding [-OrNull](../../sql-reference/aggregate-functions/combinators.md/#agg-functions-combinator-ornull) suffix to them. Enable it for SQL standard compatibility.
-It is implemented via query rewrite (similar to [count_distinct_implementation](#settings-count_distinct_implementation) setting) to get consistent results for distributed queries.
+It is implemented via query rewrite (similar to [count_distinct_implementation](#count_distinct_implementation) setting) to get consistent results for distributed queries.

 Possible values:

@ -4609,7 +4609,7 @@ Default: 0

 ## rewrite_count_distinct_if_with_count_distinct_implementation

-Allows you to rewrite `countDistcintIf` with [count_distinct_implementation](#settings-count_distinct_implementation) setting.
+Allows you to rewrite `countDistcintIf` with [count_distinct_implementation](#count_distinct_implementation) setting.

 Possible values:

@ -4667,3 +4667,44 @@ The default value is `false`.
 ``` xml
 <validate_tcp_client_information>true</validate_tcp_client_information>
 ```
+
+## ignore_access_denied_multidirectory_globs {#ignore_access_denied_multidirectory_globs}
+
+Allows to ignore 'permission denied' errors when using multi-directory `{}` globs for [File](../../sql-reference/table-functions/file.md#globs_in_path) and [HDFS](../../sql-reference/table-functions/hdfs.md) storages.
+This setting is only applicable to multi directory `{}` globs.
+
+Possible values: `0`, `1`.
+
+Default value: `0`.
+
+### Example
+
+Having the following structure in `user_files`:
+```
+my_directory/
+├── data1
+│   ├── f1.csv
+├── data2
+│   ├── f2.csv
+└── test_root
+```
+where `data1`, `data2` directories are accessible, but one has no rights to read `test_root` directories.
+
+For a query like `SELECT *, _path, _file FROM file('my_directory/{data1/f1,data2/f2}.csv', CSV)` an exception will be thrown:
+`Code: 1001. DB::Exception: std::__1::__fs::filesystem::filesystem_error: filesystem error: in directory_iterator::directory_iterator(...): Permission denied`.  
+It happens because a multi-directory glob requires a recursive search in _all_ available directories under `my_directory`.
+
+If this setting is on, all inaccessible directories will be silently skipped, even if they are explicitly specified inside `{}`.
+
+```sql
+SELECT _path, _file FROM file('my_directory/{data1/f1,data2/f2}.csv', CSV) SETTINGS ignore_access_denied_multidirectory_globs = 0;
+
+Code: 1001. DB::Exception: std::__1::__fs::filesystem::filesystem_error: filesystem error: in directory_iterator::directory_iterator(...): Permission denied
+```
+```sql
+SELECT _path, _file FROM file('my_directory/{data1/f1,data2/f2}.csv', CSV) SETTINGS ignore_access_denied_multidirectory_globs = 1;
+
+┌─_path───────────────────┬─_file───────┐
+│ <full path to file>     │ <file name> │
+└─────────────────────────┴─────────────┘
+```
--- a/docs/en/operations/utilities/clickhouse-keeper-client.md
+++ b/docs/en/operations/utilities/clickhouse-keeper-client.md
@ -45,13 +45,13 @@ keeper foo bar
 ## Commands {#clickhouse-keeper-client-commands}

 -   `ls [path]` -- Lists the nodes for the given path (default: cwd)
-   `cd [path]` -- Change the working path (default `.`)
+-   `cd [path]` -- Changes the working path (default `.`)
 -   `exists <path>` -- Returns `1` if node exists, `0` otherwise
-   `set <path> <value> [version]` -- Updates the node's value. Only update if version matches (default: -1)
+-   `set <path> <value> [version]` -- Updates the node's value. Only updates if version matches (default: -1)
 -   `create <path> <value> [mode]` -- Creates new node with the set value
 -   `touch <path>` -- Creates new node with an empty string as value. Doesn't throw an exception if the node already exists
 -   `get <path>` -- Returns the node's value
-   `remove <path>` -- Remove the node
+-   `rm <path> [version]` -- Removes the node only if version matches (default: -1)
 -   `rmr <path>` -- Recursively deletes path. Confirmation required
 -   `flwc <command>` -- Executes four-letter-word command
 -   `help` -- Prints this message
--- a/docs/en/sql-reference/aggregate-functions/reference/grouparray.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/grouparray.md
@ -12,7 +12,7 @@ Values can be added to the array in any (indeterminate) order.

 The second version (with the `max_size` parameter) limits the size of the resulting array to `max_size` elements. For example, `groupArray(1)(x)` is equivalent to `[any (x)]`.

-In some cases, you can still rely on the order of execution. This applies to cases when `SELECT` comes from a subquery that uses `ORDER BY`.
+In some cases, you can still rely on the order of execution. This applies to cases when `SELECT` comes from a subquery that uses `ORDER BY` if the subquery result is small enough.

 **Example**

--- a/docs/en/sql-reference/aggregate-functions/reference/grouparraylast.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraylast.md
@ -10,7 +10,7 @@ Syntax: `groupArrayLast(max_size)(x)`
 Creates an array of last argument values.
 For example, `groupArrayLast(1)(x)` is equivalent to `[anyLast (x)]`.

-In some cases, you can still rely on the order of execution. This applies to cases when `SELECT` comes from a subquery that uses `ORDER BY`.
+In some cases, you can still rely on the order of execution. This applies to cases when `SELECT` comes from a subquery that uses `ORDER BY` if the subquery result is small enough.

 **Example**

--- a/docs/en/sql-reference/dictionaries/index.md
+++ b/docs/en/sql-reference/dictionaries/index.md
@ -123,7 +123,7 @@ LAYOUT(...) -- Memory layout configuration
 LIFETIME(...) -- Lifetime of dictionary in memory
 ```

-## Storing Dictionaries in Memory {#storig-dictionaries-in-memory}
+## Storing Dictionaries in Memory {#storing-dictionaries-in-memory}

 There are a variety of ways to store dictionaries in memory.

--- a/docs/en/sql-reference/functions/array-functions.md
+++ b/docs/en/sql-reference/functions/array-functions.md
@ -657,7 +657,7 @@ SELECT arraySlice([1, 2, NULL, 4, 5], 2, 3) AS res;

 Array elements set to `NULL` are handled as normal values.

-## arraySort(\[func,\] arr, …) {#array_functions-sort}
+## arraySort(\[func,\] arr, …) {#sort}

 Sorts the elements of the `arr` array in ascending order. If the `func` function is specified, sorting order is determined by the result of the `func` function applied to the elements of the array. If `func` accepts multiple arguments, the `arraySort` function is passed several arrays that the arguments of `func` will correspond to. Detailed examples are shown at the end of `arraySort` description.

@ -716,7 +716,7 @@ SELECT arraySort((x) -> -x, [1, 2, 3]) as res;
 └─────────┘
 ```

-For each element of the source array, the lambda function returns the sorting key, that is, \[1 –\> -1, 2 –\> -2, 3 –\> -3\]. Since the `arraySort` function sorts the keys in ascending order, the result is \[3, 2, 1\]. Thus, the `(x) –> -x` lambda function sets the [descending order](#array_functions-reverse-sort) in a sorting.
+For each element of the source array, the lambda function returns the sorting key, that is, \[1 –\> -1, 2 –\> -2, 3 –\> -3\]. Since the `arraySort` function sorts the keys in ascending order, the result is \[3, 2, 1\]. Thus, the `(x) –> -x` lambda function sets the [descending order](#reverse-sort) in a sorting.

 The lambda function can accept multiple arguments. In this case, you need to pass the `arraySort` function several arrays of identical length that the arguments of lambda function will correspond to. The resulting array will consist of elements from the first input array; elements from the next input array(s) specify the sorting keys. For example:

@ -762,7 +762,7 @@ To improve sorting efficiency, the [Schwartzian transform](https://en.wikipedia.

 Same as `arraySort` with additional `limit` argument allowing partial sorting. Returns an array of the same size as the original array where elements in range `[1..limit]` are sorted in ascending order. Remaining elements `(limit..N]` shall contain elements in unspecified order.

-## arrayReverseSort(\[func,\] arr, …) {#array_functions-reverse-sort}
+## arrayReverseSort(\[func,\] arr, …) {#reverse-sort}

 Sorts the elements of the `arr` array in descending order. If the `func` function is specified, `arr` is sorted according to the result of the `func` function applied to the elements of the array, and then the sorted array is reversed. If `func` accepts multiple arguments, the `arrayReverseSort` function is passed several arrays that the arguments of `func` will correspond to. Detailed examples are shown at the end of `arrayReverseSort` description.

--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@ -239,7 +239,7 @@ int32samoa: 1546300800

 **See Also**

- [formatDateTime](#date_time_functions-formatDateTime) - supports non-constant timezone.
+- [formatDateTime](#formatDateTime) - supports non-constant timezone.
 - [toString](type-conversion-functions.md#tostring) - supports non-constant timezone.

 ## timeZoneOf
@ -983,6 +983,8 @@ Result:

 Adds the time interval or date interval to the provided date or date with time.

+If the addition results in a value outside the bounds of the data type, the result is undefined.
+
 **Syntax**

 ``` sql
@ -1006,13 +1008,13 @@ Aliases: `dateAdd`, `DATE_ADD`.
    - `year`

 - `value` — Value of interval to add. [Int](../../sql-reference/data-types/int-uint.md).
- `date` — The date or date with time to which `value` is added. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
+- `date` — The date or date with time to which `value` is added. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).

 **Returned value**

 Date or date with time obtained by adding `value`, expressed in `unit`, to `date`.

-Type: [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
+Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).

 **Example**

@ -1028,10 +1030,16 @@ Result:
 └───────────────────────────────────────────────┘
 ```

+**See Also**
+
+- [addDate](#addDate)
+
 ## date\_sub

 Subtracts the time interval or date interval from the provided date or date with time.

+If the subtraction results in a value outside the bounds of the data type, the result is undefined.
+
 **Syntax**

 ``` sql
@ -1056,13 +1064,13 @@ Aliases: `dateSub`, `DATE_SUB`.
    - `year`

 - `value` — Value of interval to subtract. [Int](../../sql-reference/data-types/int-uint.md).
- `date` — The date or date with time from which `value` is subtracted. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
+- `date` — The date or date with time from which `value` is subtracted. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).

 **Returned value**

 Date or date with time obtained by subtracting `value`, expressed in `unit`, from `date`.

-Type: [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
+Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).

 **Example**

@ -1078,10 +1086,15 @@ Result:
 └────────────────────────────────────────────────┘
 ```

+**See Also**
+- [subDate](#subDate)
+
 ## timestamp\_add

 Adds the specified time value with the provided date or date time value.

+If the addition results in a value outside the bounds of the data type, the result is undefined.
+
 **Syntax**

 ``` sql
@ -1092,7 +1105,7 @@ Aliases: `timeStampAdd`, `TIMESTAMP_ADD`.

 **Arguments**

- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
+- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
 - `value` — Value of interval to add. [Int](../../sql-reference/data-types/int-uint.md).
 - `unit` — The type of interval to add. [String](../../sql-reference/data-types/string.md).
    Possible values:
@ -1110,7 +1123,7 @@ Aliases: `timeStampAdd`, `TIMESTAMP_ADD`.

 Date or date with time with the specified `value` expressed in `unit` added to `date`.

-Type: [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
+Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).

 **Example**

@ -1130,6 +1143,8 @@ Result:

 Subtracts the time interval from the provided date or date with time.

+If the subtraction results in a value outside the bounds of the data type, the result is undefined.
+
 **Syntax**

 ``` sql
@ -1153,13 +1168,13 @@ Aliases: `timeStampSub`, `TIMESTAMP_SUB`.
    - `year`

 - `value` — Value of interval to subtract. [Int](../../sql-reference/data-types/int-uint.md).
- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
+- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).

 **Returned value**

 Date or date with time obtained by subtracting `value`, expressed in `unit`, from `date`.

-Type: [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
+Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).

 **Example**

@ -1175,7 +1190,91 @@ Result:
 └──────────────────────────────────────────────────────────────┘
 ```

-## now
+## addDate
+
+Adds the time interval or date interval to the provided date or date with time.
+
+If the addition results in a value outside the bounds of the data type, the result is undefined.
+
+**Syntax**
+
+``` sql
+addDate(date, interval)
+```
+
+**Arguments**
+
+- `date` — The date or date with time to which `interval` is added. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
+- `interval` — Interval to add. [Interval](../../sql-reference/data-types/special-data-types/interval.md).
+
+**Returned value**
+
+Date or date with time obtained by adding `interval` to `date`.
+
+Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
+
+**Example**
+
+```sql
+SELECT addDate(toDate('2018-01-01'), INTERVAL 3 YEAR);
+```
+
+Result:
+
+```text
+┌─addDate(toDate('2018-01-01'), toIntervalYear(3))─┐
+│                                       2021-01-01 │
+└──────────────────────────────────────────────────┘
+```
+
+Alias: `ADDDATE`
+
+**See Also**
+- [date_add](#date_add)
+
+## subDate
+
+Subtracts the time interval or date interval from the provided date or date with time.
+
+If the subtraction results in a value outside the bounds of the data type, the result is undefined.
+
+**Syntax**
+
+``` sql
+subDate(date, interval)
+```
+
+**Arguments**
+
+- `date` — The date or date with time from which `interval` is subtracted. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
+- `interval` — Interval to subtract. [Interval](../../sql-reference/data-types/special-data-types/interval.md).
+
+**Returned value**
+
+Date or date with time obtained by subtracting `interval` from `date`.
+
+Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
+
+**Example**
+
+```sql
+SELECT subDate(toDate('2018-01-01'), INTERVAL 3 YEAR);
+```
+
+Result:
+
+```text
+┌─subDate(toDate('2018-01-01'), toIntervalYear(3))─┐
+│                                       2015-01-01 │
+└──────────────────────────────────────────────────┘
+```
+
+Alias: `SUBDATE`
+
+**See Also**
+- [date_sub](#date_sub)
+
+## now {#now}

 Returns the current date and time at the moment of query analysis. The function is a constant expression.

@ -1262,7 +1361,7 @@ Result:
 └─────────────────────────┴───────────────────────────────┘
 ```

-## nowInBlock
+## nowInBlock {#nowInBlock}

 Returns the current date and time at the moment of processing of each block of data. In contrast to the function [now](#now), it is not a constant expression, and the returned value will be different in different blocks for long-running queries.

@ -1306,14 +1405,14 @@ Result:
 └─────────────────────┴─────────────────────┴──────────┘
 ```

-## today
+## today {#today}

 Accepts zero arguments and returns the current date at one of the moments of query analysis.
 The same as ‘toDate(now())’.

 Aliases: `curdate`, `current_date`.

-## yesterday
+## yesterday {#yesterday}

 Accepts zero arguments and returns yesterday’s date at one of the moments of query analysis.
 The same as ‘today() - 1’.
@ -1326,6 +1425,8 @@ Rounds the time to the half hour.

 Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 100 + MM). Accepts a second optional timezone argument. If provided, the timezone must be a string constant.

+This functions is the opposite of function `YYYYMMDDToDate()`.
+
 **Example**

 ``` sql
@ -1348,8 +1449,7 @@ Converts a date or date with time to a UInt32 number containing the year and mon
 **Example**

 ```sql
-SELECT
-    toYYYYMMDD(now(), 'US/Eastern')
+SELECT toYYYYMMDD(now(), 'US/Eastern')
 ```

 Result:
@ -1367,8 +1467,7 @@ Converts a date or date with time to a UInt64 number containing the year and mon
 **Example**

 ```sql
-SELECT
-    toYYYYMMDDhhmmss(now(), 'US/Eastern')
+SELECT toYYYYMMDDhhmmss(now(), 'US/Eastern')
 ```

 Result:
@ -1379,6 +1478,93 @@ Result:
 └───────────────────────────────────────┘
 ```

+## YYYYMMDDToDate
+
+Converts a number containing the year, month and day number to a [Date](../../sql-reference/data-types/date.md).
+
+This functions is the opposite of function `toYYYYMMDD()`.
+
+The output is undefined if the input does not encode a valid Date value.
+
+**Syntax**
+
+```sql
+YYYYMMDDToDate(yyyymmdd);
+```
+
+**Arguments**
+
+- `yyyymmdd` - A number representing the year, month and day. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
+
+**Returned value**
+
+- a date created from the arguments.
+
+Type: [Date](../../sql-reference/data-types/date.md).
+
+**Example**
+
+```sql
+SELECT YYYYMMDDToDate(20230911);
+```
+
+Result:
+
+```response
+┌─toYYYYMMDD(20230911)─┐
+│           2023-09-11 │
+└──────────────────────┘
+```
+
+## YYYYMMDDToDate32
+
+Like function `YYYYMMDDToDate()` but produces a [Date32](../../sql-reference/data-types/date32.md).
+
+## YYYYMMDDhhmmssToDateTime
+
+Converts a number containing the year, month, day, hours, minute and second number to a [DateTime](../../sql-reference/data-types/datetime.md).
+
+The output is undefined if the input does not encode a valid DateTime value.
+
+This functions is the opposite of function `toYYYYMMDDhhmmss()`.
+
+**Syntax**
+
+```sql
+YYYYMMDDhhmmssToDateTime(yyyymmddhhmmss[, timezone]);
+```
+
+**Arguments**
+
+- `yyyymmddhhmmss` - A number representing the year, month and day. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
+- `timezone` - [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional).
+
+**Returned value**
+
+- a date with time created from the arguments.
+
+Type: [DateTime](../../sql-reference/data-types/datetime.md).
+
+**Example**
+
+```sql
+SELECT YYYYMMDDToDateTime(20230911131415);
+```
+
+Result:
+
+```response
+┌──────YYYYMMDDhhmmssToDateTime(20230911131415)─┐
+│                           2023-09-11 13:14:15 │
+└───────────────────────────────────────────────┘
+```
+
+## YYYYMMDDhhmmssToDateTime64
+
+Like function `YYYYMMDDhhmmssToDate()` but produces a [DateTime64](../../sql-reference/data-types/datetime64.md).
+
+Accepts an additional, optional `precision` parameter after the `timezone` parameter.
+
 ## addYears, addMonths, addWeeks, addDays, addHours, addMinutes, addSeconds, addQuarters

 Function adds a Date/DateTime interval to a Date/DateTime and then return the Date/DateTime. For example:
@ -1442,7 +1628,7 @@ SELECT timeSlots(toDateTime64('1980-12-12 21:01:02.1234', 4, 'UTC'), toDecimal64
 └───────────────────────────────────────────────────────────────────────────────────────────────────────────┘
 ```

-## formatDateTime {#date_time_functions-formatDateTime}
+## formatDateTime {#formatDateTime}

 Formats a Time according to the given Format string. Format is a constant expression, so you cannot have multiple formats for a single result column.

@ -1567,7 +1753,7 @@ LIMIT 10
 - [formatDateTimeInJodaSyntax](##formatDateTimeInJodaSyntax)


-## formatDateTimeInJodaSyntax {#date_time_functions-formatDateTimeInJodaSyntax}
+## formatDateTimeInJodaSyntax {#formatDateTimeInJodaSyntax}

 Similar to formatDateTime, except that it formats datetime in Joda style instead of MySQL style. Refer to https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html.

@ -1671,7 +1857,7 @@ monthName(date)

 **Arguments**

- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
+- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).

 **Returned value**

--- a/docs/en/sql-reference/functions/ext-dict-functions.md
+++ b/docs/en/sql-reference/functions/ext-dict-functions.md
@ -12,7 +12,7 @@ For dictionaries created with [DDL queries](../../sql-reference/statements/creat

 For information on connecting and configuring dictionaries, see [Dictionaries](../../sql-reference/dictionaries/index.md).

-## dictGet, dictGetOrDefault, dictGetOrNull
+## dictGet, dictGetOrDefault, dictGetOrNull {#dictGet}

 Retrieves values from a dictionary.

--- a/docs/en/sql-reference/functions/hash-functions.md
+++ b/docs/en/sql-reference/functions/hash-functions.md
@ -19,7 +19,7 @@ halfMD5(par1, ...)
 ```

 The function is relatively slow (5 million short strings per second per processor core).
-Consider using the [sipHash64](#hash_functions-siphash64) function instead.
+Consider using the [sipHash64](#siphash64) function instead.

 **Arguments**

@ -45,13 +45,13 @@ SELECT halfMD5(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00')

 Calculates the MD4 from a string and returns the resulting set of bytes as FixedString(16).

-## MD5 {#hash_functions-md5}
+## MD5 {#md5}

 Calculates the MD5 from a string and returns the resulting set of bytes as FixedString(16).
 If you do not need MD5 in particular, but you need a decent cryptographic 128-bit hash, use the ‘sipHash128’ function instead.
 If you want to get the same result as output by the md5sum utility, use lower(hex(MD5(s))).

-## sipHash64 {#hash_functions-siphash64}
+## sipHash64 {#siphash64}

 Produces a 64-bit [SipHash](https://en.wikipedia.org/wiki/SipHash) hash value.

@ -59,7 +59,7 @@ Produces a 64-bit [SipHash](https://en.wikipedia.org/wiki/SipHash) hash value.
 sipHash64(par1,...)
 ```

-This is a cryptographic hash function. It works at least three times faster than the [MD5](#hash_functions-md5) hash function.
+This is a cryptographic hash function. It works at least three times faster than the [MD5](#md5) hash function.

 The function [interprets](/docs/en/sql-reference/functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. It then combines the hashes by the following algorithm:

@ -91,7 +91,7 @@ SELECT sipHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00

 ## sipHash64Keyed

-Same as [sipHash64](#hash_functions-siphash64) but additionally takes an explicit key argument instead of using a fixed key.
+Same as [sipHash64](#siphash64) but additionally takes an explicit key argument instead of using a fixed key.

 **Syntax**

@ -101,7 +101,7 @@ sipHash64Keyed((k0, k1), par1,...)

 **Arguments**

-Same as [sipHash64](#hash_functions-siphash64), but the first argument is a tuple of two UInt64 values representing the key.
+Same as [sipHash64](#siphash64), but the first argument is a tuple of two UInt64 values representing the key.

 **Returned value**

@ -123,12 +123,12 @@ SELECT sipHash64Keyed((506097522914230528, 1084818905618843912), array('e','x','

 ## sipHash128

-Like [sipHash64](#hash_functions-siphash64) but produces a 128-bit hash value, i.e. the final xor-folding state is done up to 128 bits.
+Like [sipHash64](#siphash64) but produces a 128-bit hash value, i.e. the final xor-folding state is done up to 128 bits.

 :::note
 This 128-bit variant differs from the reference implementation and it's weaker.
 This version exists because, when it was written, there was no official 128-bit extension for SipHash.
-New projects should probably use [sipHash128Reference](#hash_functions-siphash128reference).
+New projects should probably use [sipHash128Reference](#siphash128reference).
 :::

 **Syntax**
@ -139,7 +139,7 @@ sipHash128(par1,...)

 **Arguments**

-Same as for [sipHash64](#hash_functions-siphash64).
+Same as for [sipHash64](#siphash64).

 **Returned value**

@ -163,12 +163,12 @@ Result:

 ## sipHash128Keyed

-Same as [sipHash128](#hash_functions-siphash128) but additionally takes an explicit key argument instead of using a fixed key.
+Same as [sipHash128](#siphash128) but additionally takes an explicit key argument instead of using a fixed key.

 :::note
 This 128-bit variant differs from the reference implementation and it's weaker.
 This version exists because, when it was written, there was no official 128-bit extension for SipHash.
-New projects should probably use [sipHash128ReferenceKeyed](#hash_functions-siphash128referencekeyed).
+New projects should probably use [sipHash128ReferenceKeyed](#siphash128referencekeyed).
 :::

 **Syntax**
@ -179,7 +179,7 @@ sipHash128Keyed((k0, k1), par1,...)

 **Arguments**

-Same as [sipHash128](#hash_functions-siphash128), but the first argument is a tuple of two UInt64 values representing the key.
+Same as [sipHash128](#siphash128), but the first argument is a tuple of two UInt64 values representing the key.

 **Returned value**

@ -203,7 +203,7 @@ Result:

 ## sipHash128Reference

-Like [sipHash128](#hash_functions-siphash128) but implements the 128-bit algorithm from the original authors of SipHash.
+Like [sipHash128](#siphash128) but implements the 128-bit algorithm from the original authors of SipHash.

 **Syntax**

@ -213,7 +213,7 @@ sipHash128Reference(par1,...)

 **Arguments**

-Same as for [sipHash128](#hash_functions-siphash128).
+Same as for [sipHash128](#siphash128).

 **Returned value**

@ -237,7 +237,7 @@ Result:

 ## sipHash128ReferenceKeyed

-Same as [sipHash128Reference](#hash_functions-siphash128reference) but additionally takes an explicit key argument instead of using a fixed key.
+Same as [sipHash128Reference](#siphash128reference) but additionally takes an explicit key argument instead of using a fixed key.

 **Syntax**

@ -247,7 +247,7 @@ sipHash128ReferenceKeyed((k0, k1), par1,...)

 **Arguments**

-Same as [sipHash128Reference](#hash_functions-siphash128reference), but the first argument is a tuple of two UInt64 values representing the key.
+Same as [sipHash128Reference](#siphash128reference), but the first argument is a tuple of two UInt64 values representing the key.

 **Returned value**

@ -536,7 +536,7 @@ Calculates `HiveHash` from a string.
 SELECT hiveHash('')
 ```

-This is just [JavaHash](#hash_functions-javahash) with zeroed out sign bit. This function is used in [Apache Hive](https://en.wikipedia.org/wiki/Apache_Hive) for versions before 3.0. This hash function is neither fast nor having a good quality. The only reason to use it is when this algorithm is already used in another system and you have to calculate exactly the same result.
+This is just [JavaHash](#javahash) with zeroed out sign bit. This function is used in [Apache Hive](https://en.wikipedia.org/wiki/Apache_Hive) for versions before 3.0. This hash function is neither fast nor having a good quality. The only reason to use it is when this algorithm is already used in another system and you have to calculate exactly the same result.

 **Returned value**

--- a/docs/en/sql-reference/functions/other-functions.md
+++ b/docs/en/sql-reference/functions/other-functions.md
@ -11,7 +11,7 @@ sidebar_label: Other
 Returns the name of the host on which this function was executed. If the function executes on a remote server (distributed processing), the remote server name is returned.
 If the function executes in the context of a distributed table, it generates a normal column with values relevant to each shard. Otherwise it produces a constant value.

-## getMacro
+## getMacro {#getMacro}

 Returns a named value from the [macros](../../operations/server-configuration-parameters/settings.md#macros) section of the server configuration.

@ -186,7 +186,7 @@ Returns the type name of the passed argument.

 If `NULL` is passed, then the function returns type `Nullable(Nothing)`, which corresponds to ClickHouse's internal `NULL` representation.

-## blockSize()
+## blockSize() {#blockSize}

 In ClickHouse, queries are processed in blocks (chunks).
 This function returns the size (row count) of the block the function is called on.
@ -311,7 +311,7 @@ Sleeps ‘seconds’ seconds for each row. The sleep time can be specified as in
 Returns the name of the current database.
 Useful in table engine parameters of `CREATE TABLE` queries where you need to specify the database.

-## currentUser()
+## currentUser() {#currentUser}

 Returns the name of the current user. In case of a distributed query, the name of the user who initiated the query is returned.

@ -771,7 +771,7 @@ If executed in the context of a distributed table, this function generates a nor

 Returns the sequence number of the data block where the row is located.

-## rowNumberInBlock()
+## rowNumberInBlock() {#rowNumberInBlock}

 Returns the ordinal number of the row in the data block. Different data blocks are always recalculated.

@ -896,7 +896,7 @@ Result:
 └────────────┴───────┴───────────┴────────────────┘
 ```

-## runningDifference(x)
+## runningDifference(x) {#runningDifference}

 Calculates the difference between two consecutive row values in the data block.
 Returns 0 for the first row, and for subsequent rows the difference to the previous row.
@ -2274,7 +2274,7 @@ Result:
 └───────────────────────────┘
 ```

-## queryID
+## queryID {#queryID}

 Returns the ID of the current query. Other parameters of a query can be extracted from the [system.query_log](../../operations/system-tables/query_log.md) table via `query_id`.

--- a/docs/en/sql-reference/functions/random-functions.md
+++ b/docs/en/sql-reference/functions/random-functions.md
@ -478,7 +478,7 @@ Result:
 └─────────────────────┘
 ```

-## randomString
+## randomString {#randomString}

 Generates a string of the specified length filled with random bytes (including zero bytes). Not all characters may be printable.

@ -627,7 +627,7 @@ Result:
 └──────────────────────┘
 ```

-## fuzzBits
+## fuzzBits {#fuzzBits}

 **Syntax**

--- a/docs/en/sql-reference/statements/create/user.md
+++ b/docs/en/sql-reference/statements/create/user.md
@ -90,7 +90,7 @@ In ClickHouse Cloud, by default, passwords must meet the following complexity re
    CREATE USER name3 IDENTIFIED WITH sha256_password BY 'my_password'
    ```

-    The `name3` user can now login using `my_password`, but the password is stored as the hashed value above. THe following SQL file was created in `/var/lib/clickhouse/access` and gets executed at server startup:
+    The `name3` user can now login using `my_password`, but the password is stored as the hashed value above. The following SQL file was created in `/var/lib/clickhouse/access` and gets executed at server startup:

    ```bash
    /var/lib/clickhouse/access $ cat 3843f510-6ebd-a52d-72ac-e021686d8a93.sql
--- a/docs/en/sql-reference/statements/show.md
+++ b/docs/en/sql-reference/statements/show.md
@ -208,7 +208,7 @@ The optional keyword `FULL` causes the output to include the collation, comment
 The statement produces a result table with the following structure:
 - field - The name of the column (String)
 - type - The column data type (String)
- null - If the column data type is Nullable (UInt8)
+- null - `YES` if the column data type is Nullable, `NO` otherwise (String)
 - key - `PRI` if the column is part of the primary key, `SOR` if the column is part of the sorting key, empty otherwise (String)
 - default - Default expression of the column if it is of type `ALIAS`, `DEFAULT`, or `MATERIALIZED`, otherwise `NULL`. (Nullable(String))
 - extra - Additional information, currently unused (String)
--- a/docs/en/sql-reference/statements/system.md
+++ b/docs/en/sql-reference/statements/system.md
@ -340,6 +340,15 @@ After running this statement the `[db.]replicated_merge_tree_family_table_name`
 - If a `LIGHTWEIGHT` modifier was specified then the query waits only for `GET_PART`, `ATTACH_PART`, `DROP_RANGE`, `REPLACE_RANGE` and `DROP_PART` entries to be processed.
 - If a `PULL` modifier was specified then the query pulls new replication queue entries from ZooKeeper, but does not wait for anything to be processed.

+### SYNC DATABASE REPLICA
+
+Waits until the specified [replicated database](https://clickhouse.com/docs/en/engines/database-engines/replicated) applies all schema changes from the DDL queue of that database. 
+
+**Syntax**
+```sql
+SYSTEM SYNC DATABASE REPLICA replicated_database_name;
+```
+
 ### RESTART REPLICA

 Provides possibility to reinitialize Zookeeper session's state for `ReplicatedMergeTree` table, will compare current state with Zookeeper as source of truth and add tasks to Zookeeper queue if needed.
--- a/docs/en/sql-reference/table-functions/file.md
+++ b/docs/en/sql-reference/table-functions/file.md
@ -135,13 +135,13 @@ Getting data from table in table.csv, located in archive1.zip or/and archive2.zi
 SELECT * FROM file('user_files/archives/archive{1..2}.zip :: table.csv');
 ```

-## Globs in Path
+## Globs in Path {#globs_in_path}

 Multiple path components can have globs. For being processed file must exist and match to the whole path pattern (not only suffix or prefix).

 - `*` — Substitutes any number of any characters except `/` including empty string.
 - `?` — Substitutes any single character.
- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`, including `/`.
+- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. In case at least one of strings contains `/`, `'permission denied'` errors may be ignored using [ignore_access_denied_multidirectory_globs](/docs/en/operations/settings/settings.md#ignore_access_denied_multidirectory_globs) setting for file & HDFS.
 - `{N..M}` — Substitutes any number in range from N to M including both borders.
 - `**` - Fetches all files inside the folder recursively.

@ -210,7 +210,7 @@ SELECT count(*) FROM file('big_dir/**/file002', 'CSV', 'name String, value UInt3
 - [engine_file_allow_create_multiple_files](/docs/en/operations/settings/settings.md#engine_file_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default.
 - [engine_file_skip_empty_files](/docs/en/operations/settings/settings.md#engine_file_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
 - [storage_file_read_method](/docs/en/operations/settings/settings.md#engine-file-emptyif-not-exists) - method of reading data from storage file, one of: read, pread, mmap (only for clickhouse-local). Default value: `pread` for clickhouse-server, `mmap` for clickhouse-local.
-
+- [ignore_access_denied_multidirectory_globs](/docs/en/operations/settings/settings.md#ignore_access_denied_multidirectory_globs) - allows to ignore permission denied errors for multi-directory globs.


 **See Also**
--- a/docs/en/sql-reference/table-functions/hdfs.md
+++ b/docs/en/sql-reference/table-functions/hdfs.md
@ -39,13 +39,13 @@ LIMIT 2
 └─────────┴─────────┴─────────┘
 ```

-**Globs in path**
+## Globs in path {#globs_in_path}

 Multiple path components can have globs. For being processed file should exists and matches to the whole path pattern (not only suffix or prefix).

 - `*` — Substitutes any number of any characters except `/` including empty string.
 - `?` — Substitutes any single character.
- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`.
+- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. In case at least one of strings contains `/`, `'permission denied'` errors may be ignored using [ignore_access_denied_multidirectory_globs](/docs/en/operations/settings/settings.md#ignore_access_denied_multidirectory_globs) setting.
 - `{N..M}` — Substitutes any number in range from N to M including both borders.

 Constructions with `{}` are similar to the [remote table function](../../sql-reference/table-functions/remote.md)).
@ -102,6 +102,7 @@ FROM hdfs('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name Strin
 - [hdfs_truncate_on_insert](/docs/en/operations/settings/settings.md#hdfs-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default.
 - [hdfs_create_multiple_files](/docs/en/operations/settings/settings.md#hdfs_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default.
 - [hdfs_skip_empty_files](/docs/en/operations/settings/settings.md#hdfs_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
+- [ignore_access_denied_multidirectory_globs](/docs/en/operations/settings/settings.md#ignore_access_denied_multidirectory_globs) - allows to ignore permission denied errors for multi-directory globs.

 **See Also**

--- a/docs/ru/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/ru/engines/table-engines/mergetree-family/mergetree.md
@ -805,8 +805,6 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd'
            <single_read_retries>4</single_read_retries>
            <min_bytes_for_seek>1000</min_bytes_for_seek>
            <metadata_path>/var/lib/clickhouse/disks/s3/</metadata_path>
-            <cache_enabled>true</cache_enabled>
-            <cache_path>/var/lib/clickhouse/disks/s3/cache/</cache_path>
            <skip_access_check>false</skip_access_check>
        </s3>
    </disks>
@ -832,8 +830,6 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd'
 -   `single_read_retries` — число попыток выполнения запроса в случае возникновения ошибки в процессе чтения. Значение по умолчанию: `4`.
 -   `min_bytes_for_seek` — минимальное количество байтов, которые используются для операций поиска вместо последовательного чтения. Значение по умолчанию: 1 МБайт.
 -   `metadata_path` — путь к локальному файловому хранилищу для хранения файлов с метаданными для S3. Значение по умолчанию: `/var/lib/clickhouse/disks/<disk_name>/`.
-   `cache_enabled` — признак, разрешено ли хранение кэша засечек и файлов индекса в локальной файловой системе. Значение по умолчанию: `true`.
-   `cache_path` — путь в локальной файловой системе, где будут храниться кэш засечек и файлы индекса. Значение по умолчанию: `/var/lib/clickhouse/disks/<disk_name>/cache/`.
 -   `skip_access_check` — признак, выполнять ли проверку доступов при запуске диска. Если установлено значение `true`, то проверка не выполняется. Значение по умолчанию: `false`.

 Диск S3 может быть сконфигурирован как `main` или `cold`:
--- a/docs/ru/operations/settings/settings.md
+++ b/docs/ru/operations/settings/settings.md
@ -4209,3 +4209,45 @@ SELECT toFloat64('1.7091'), toFloat64('1.5008753E7') SETTINGS precise_float_pars
 │              1.7091 │                 15008753 │
 └─────────────────────┴──────────────────────────┘
 ```
+
+## ignore_access_denied_multidirectory_globs {#ignore_access_denied_multidirectory_globs}
+
+Позволяет игнорировать ошибку 'permission denied', возникающую при использовании шаблона `{}`, содержащего `/` внутри себя.
+Работает для [File](../../sql-reference/table-functions/file.md#globs_in_path) и [HDFS](../../sql-reference/table-functions/hdfs.md).
+Работает _только_ для указанных выше шаблонов `{}`.
+
+Возможные значения: `0`, `1`.
+
+Значение по умолчанию: `0`.
+
+### Пример
+
+Пусть в `user_files` имеется следующая структура:
+```
+my_directory/
+├── data1
+│   ├── f1.csv
+├── data2
+│   ├── f2.csv
+└── test_root
+```
+Пусть также директории `data1`, `data2` могут быть прочитаны, но прав на чтение `test_root` нет.
+
+На запрос `SELECT *, _path, _file FROM file('my_directory/{data1/f1,data2/f2}.csv', CSV)` будет выброшено исключение:
+`Code: 1001. DB::Exception: std::__1::__fs::filesystem::filesystem_error: filesystem error: in directory_iterator::directory_iterator(...): Permission denied`.  
+Это происходит, так как для обработки такого шаблона необходимо выполнить рекурсивный поиск по _всем_ директориям, находящимся внутри `my_directory`.
+
+Если данная настройка имеет значение 1, то недоступные директории будут тихо пропущены, даже если они явно указаны внутри `{}`.
+
+```sql
+SELECT _path, _file FROM file('my_directory/{data1/f1,data2/f2}.csv', CSV) SETTINGS ignore_access_denied_multidirectory_globs = 0;
+
+Code: 1001. DB::Exception: std::__1::__fs::filesystem::filesystem_error: filesystem error: in directory_iterator::directory_iterator(...): Permission denied
+```
+```sql
+SELECT _path, _file FROM file('my_directory/{data1/f1,data2/f2}.csv', CSV) SETTINGS ignore_access_denied_multidirectory_globs = 1;
+
+┌─_path───────────────────┬─_file───────┐
+│ <full path to file>     │ <file name> │
+└─────────────────────────┴─────────────┘
+```
--- a/docs/ru/sql-reference/table-functions/file.md
+++ b/docs/ru/sql-reference/table-functions/file.md
@ -79,7 +79,7 @@ SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 U

 -   `*` — заменяет любое количество любых символов кроме `/`, включая отсутствие символов.
 -   `?` — заменяет ровно один любой символ.
-   `{some_string,another_string,yet_another_one}` — заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`, причём строка может содержать `/`.
+-   `{some_string,another_string,yet_another_one}` — заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`. В случае, если в какой-либо из строк содержится `/`, то ошибки доступа (permission denied) к существующим, но недоступным директориям/файлам могут быть проигнорированы при помощи настройки [ignore_access_denied_multidirectory_globs](/docs/ru/operations/settings/settings.md#ignore_access_denied_multidirectory_globs).
 -   `{N..M}` — заменяет любое число в интервале от `N` до `M` включительно (может содержать ведущие нули).

 Конструкция с `{}` аналогична табличной функции [remote](remote.md).
@ -124,6 +124,7 @@ SELECT count(*) FROM file('big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name String,
 -   `_path` — путь к файлу.
 -   `_file` — имя файла.

+
 **Смотрите также**

 -   [Виртуальные столбцы](index.md#table_engines-virtual_columns)
--- a/docs/ru/sql-reference/table-functions/hdfs.md
+++ b/docs/ru/sql-reference/table-functions/hdfs.md
@ -39,11 +39,11 @@ LIMIT 2
 └─────────┴─────────┴─────────┘
 ```

-**Шаблоны в пути**
+## Шаблоны поиска в компонентах пути {#globs-in-path}

 -   `*` — Заменяет любое количество любых символов кроме `/`, включая отсутствие символов.
 -   `?` — Заменяет ровно один любой символ.
-   `{some_string,another_string,yet_another_one}` — Заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`.
+-   `{some_string,another_string,yet_another_one}` — Заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`. В случае, если в какой-либо из строк содержится `/`, то ошибки доступа (permission denied) к существующим, но недоступным директориям/файлам могут быть проигнорированы при помощи настройки [ignore_access_denied_multidirectory_globs](/docs/ru/operations/settings/settings.md#ignore_access_denied_multidirectory_globs).
 -   `{N..M}` — Заменяет любое число в интервале от `N` до `M` включительно (может содержать ведущие нули).

 Конструкция с `{}` аналогична табличной функции [remote](remote.md).
@ -62,3 +62,5 @@ LIMIT 2
 **Смотрите также**

 -   [Виртуальные столбцы](index.md#table_engines-virtual_columns)
+-   Параметр [ignore_access_denied_multidirectory_globs](/docs/ru/operations/settings/settings.md#ignore_access_denied_multidirectory_globs)
+
--- a/docs/zh/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/zh/engines/table-engines/mergetree-family/mergetree.md
@ -745,8 +745,6 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd'
            <single_read_retries>4</single_read_retries>
            <min_bytes_for_seek>1000</min_bytes_for_seek>
            <metadata_path>/var/lib/clickhouse/disks/s3/</metadata_path>
-            <cache_enabled>true</cache_enabled>
-            <cache_path>/var/lib/clickhouse/disks/s3/cache/</cache_path>
            <skip_access_check>false</skip_access_check>
        </s3>
    </disks>
@ -772,8 +770,6 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd'
 - `single_read_retries` - 读过程中连接丢失后重试次数，默认值为4。
 - `min_bytes_for_seek` - 使用查找操作，而不是顺序读操作的最小字节数，默认值为1000。
 - `metadata_path` - 本地存放S3元数据文件的路径，默认值为`/var/lib/clickhouse/disks/<disk_name>/`
- `cache_enabled` - 是否允许缓存标记和索引文件。默认值为`true`。
- `cache_path` - 本地缓存标记和索引文件的路径。默认值为`/var/lib/clickhouse/disks/<disk_name>/cache/`。
 - `skip_access_check` - 如果为`true`，Clickhouse启动时不检查磁盘是否可用。默认为`false`。
 - `server_side_encryption_customer_key_base64` - 如果指定该项的值，请求时会加上为了访问SSE-C加密数据而必须的头信息。

@ -823,4 +819,3 @@ S3磁盘也可以设置冷热存储：
 -    `_part_uuid` - 唯一部分标识符（如果 MergeTree 设置`assign_part_uuids` 已启用）。
 -    `_partition_value` — `partition by` 表达式的值（元组）。
 -    `_sample_factor` - 采样因子（来自请求）。
-
--- a/docs/zh/sql-reference/aggregate-functions/reference/stddevsamp.md
+++ b/docs/zh/sql-reference/aggregate-functions/reference/stddevsamp.md
@ -5,8 +5,8 @@ sidebar_position: 31

 # stddevSamp {#stddevsamp}

-结果等于 [varSamp] (../../../sql-reference/aggregate-functions/reference/varsamp.md)的平方根。
+结果等于 [varSamp](../../../sql-reference/aggregate-functions/reference/varsamp.md) 的平方根。

 :::note
 该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中，使用 `stddevSampStable` 函数。 它的工作速度较慢，但提供较低的计算错误。
-:::
+:::
--- a/packages/clickhouse-keeper.yaml
+++ b/packages/clickhouse-keeper.yaml
@ -32,10 +32,10 @@ contents:
  dst: /usr/bin/clickhouse-keeper
 - src: clickhouse-keeper.service
  dst: /lib/systemd/system/clickhouse-keeper.service
- src: clickhouse
+- src: clickhouse-keeper
  dst: /usr/bin/clickhouse-keeper-client
  type: symlink
- src: clickhouse
+- src: clickhouse-keeper
  dst: /usr/bin/clickhouse-keeper-converter
  type: symlink
 # docs
--- a/programs/diagnostics/testdata/configs/xml/config.xml
+++ b/programs/diagnostics/testdata/configs/xml/config.xml
@ -1209,8 +1209,6 @@
                <single_read_retries>4</single_read_retries>
                <min_bytes_for_seek>1000</min_bytes_for_seek>
                <metadata_path>/var/lib/clickhouse/disks/s3/</metadata_path>
-                <cache_enabled>true</cache_enabled>
-                <cache_path>/var/lib/clickhouse/disks/s3/cache/</cache_path>
                <skip_access_check>false</skip_access_check>
            </s3>
        </disks>
--- a/programs/extract-from-config/ExtractFromConfig.cpp
+++ b/programs/extract-from-config/ExtractFromConfig.cpp
@ -18,7 +18,14 @@
 #include <Common/Exception.h>
 #include <Common/parseGlobs.h>

+#ifdef __clang__
+#  pragma clang diagnostic push
+#  pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
+#endif
 #include <re2/re2.h>
+#ifdef __clang__
+#  pragma clang diagnostic pop
+#endif

 static void setupLogging(const std::string & log_level)
 {
--- a/programs/git-import/git-import.cpp
+++ b/programs/git-import/git-import.cpp
@ -9,8 +9,6 @@
 #include <thread>
 #include <filesystem>

-#include <re2/re2.h>
-
 #include <boost/program_options.hpp>

 #include <Common/TerminalSize.h>
@ -26,6 +24,14 @@
 #include <IO/WriteBufferFromFile.h>
 #include <IO/WriteBufferFromFileDescriptor.h>

+#ifdef __clang__
+#  pragma clang diagnostic push
+#  pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
+#endif
+#include <re2/re2.h>
+#ifdef __clang__
+#  pragma clang diagnostic pop
+#endif

 static constexpr auto documentation = R"(
 A tool to extract information from Git repository for analytics.
--- a/programs/keeper-client/Commands.cpp
+++ b/programs/keeper-client/Commands.cpp
@ -347,12 +347,20 @@ bool RMCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node
        return false;
    node->args.push_back(std::move(path));

+    ASTPtr version;
+    if (ParserNumber{}.parse(pos, version, expected))
+        node->args.push_back(version->as<ASTLiteral &>().value);
+
    return true;
 }

 void RMCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const
 {
-    client->zookeeper->remove(client->getAbsolutePath(query->args[0].safeGet<String>()));
+    Int32 version{-1};
+    if (query->args.size() == 2)
+        version = static_cast<Int32>(query->args[1].get<Int32>());
+
+    client->zookeeper->remove(client->getAbsolutePath(query->args[0].safeGet<String>()), version);
 }

 bool RMRCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
@ -368,8 +376,8 @@ bool RMRCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & nod
 void RMRCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const
 {
    String path = client->getAbsolutePath(query->args[0].safeGet<String>());
-    client->askConfirmation("You are going to recursively delete path " + path,
-                            [client, path]{ client->zookeeper->removeRecursive(path); });
+    client->askConfirmation(
+        "You are going to recursively delete path " + path, [client, path] { client->zookeeper->removeRecursive(path); });
 }

 bool ReconfigCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, DB::Expected & expected) const
--- a/programs/keeper-client/Commands.h
+++ b/programs/keeper-client/Commands.h
@ -51,7 +51,7 @@ class CDCommand : public IKeeperClientCommand

    void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;

-    String getHelpMessage() const override { return "{} [path] -- Change the working path (default `.`)"; }
+    String getHelpMessage() const override { return "{} [path] -- Changes the working path (default `.`)"; }
 };

 class SetCommand : public IKeeperClientCommand
@ -64,7 +64,7 @@ class SetCommand : public IKeeperClientCommand

    String getHelpMessage() const override
    {
-        return "{} <path> <value> [version] -- Updates the node's value. Only update if version matches (default: -1)";
+        return "{} <path> <value> [version] -- Updates the node's value. Only updates if version matches (default: -1)";
    }
 };

@ -165,7 +165,6 @@ class FindBigFamily : public IKeeperClientCommand
    }
 };

-
 class RMCommand : public IKeeperClientCommand
 {
    String getName() const override { return "rm"; }
@ -174,7 +173,7 @@ class RMCommand : public IKeeperClientCommand

    void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;

-    String getHelpMessage() const override { return "{} <path> -- Remove the node"; }
+    String getHelpMessage() const override { return "{} <path> [version] -- Removes the node only if version matches (default: -1)"; }
 };

 class RMRCommand : public IKeeperClientCommand
--- a/programs/keeper-client/Parser.cpp
+++ b/programs/keeper-client/Parser.cpp
@ -11,8 +11,6 @@ bool parseKeeperArg(IParser::Pos & pos, Expected & expected, String & result)
    {
        if (!parseIdentifierOrStringLiteral(pos, expected, result))
            return false;
-
-        ParserToken{TokenType::Whitespace}.ignore(pos);
    }

    while (pos->type != TokenType::Whitespace && pos->type != TokenType::EndOfStream && pos->type != TokenType::Semicolon)
--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@ -572,17 +572,14 @@ void LocalServer::processConfig()
    if (!queries.empty() && config().has("queries-file"))
        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Options '--query' and '--queries-file' cannot be specified at the same time");

+    if (config().has("multiquery"))
+        is_multiquery = true;
+
    delayed_interactive = config().has("interactive") && (!queries.empty() || config().has("queries-file"));
-    if (is_interactive && !delayed_interactive)
-    {
-        if (config().has("multiquery"))
-            is_multiquery = true;
-    }
-    else
+    if (!is_interactive || delayed_interactive)
    {
        echo_queries = config().hasOption("echo") || config().hasOption("verbose");
        ignore_error = config().getBool("ignore-error", false);
-        is_multiquery = true;
    }

    print_stack_trace = config().getBool("stacktrace", false);
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@ -1393,7 +1393,7 @@ try
    const auto interserver_listen_hosts = getInterserverListenHosts(config());
    const auto listen_try = getListenTry(config());

-    if (config().has("keeper_server"))
+    if (config().has("keeper_server.server_id"))
    {
 #if USE_NURAFT
        //// If we don't have configured connection probably someone trying to use clickhouse-server instead
--- a/programs/server/config.xml
+++ b/programs/server/config.xml
@ -448,8 +448,6 @@
                <account_name>account</account_name>
                <account_key>pass123</account_key>
                <metadata_path>/var/lib/clickhouse/disks/blob_storage_disk/</metadata_path>
-                <cache_enabled>true</cache_enabled>
-                <cache_path>/var/lib/clickhouse/disks/blob_storage_disk/cache/</cache_path>
                <skip_access_check>false</skip_access_check>
            </blob_storage_disk>
        </disks>
--- a/programs/static-files-disk-uploader/static-files-disk-uploader.cpp
+++ b/programs/static-files-disk-uploader/static-files-disk-uploader.cpp
@ -10,9 +10,17 @@
 #include <Disks/IO/createReadBufferFromFileBase.h>

 #include <boost/program_options.hpp>
-#include <re2/re2.h>
 #include <filesystem>

+#ifdef __clang__
+#  pragma clang diagnostic push
+#  pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
+#endif
+#include <re2/re2.h>
+#ifdef __clang__
+#  pragma clang diagnostic pop
+#endif
+
 namespace fs = std::filesystem;

 #define EXTRACT_PATH_PATTERN ".*\\/store/(.*)"
--- a/src/Access/AccessControl.cpp
+++ b/src/Access/AccessControl.cpp
@ -26,10 +26,17 @@
 #include <IO/Operators.h>
 #include <Poco/AccessExpireCache.h>
 #include <boost/algorithm/string/join.hpp>
-#include <re2/re2.h>
 #include <filesystem>
 #include <mutex>

+#ifdef __clang__
+#  pragma clang diagnostic push
+#  pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
+#endif
+#include <re2/re2.h>
+#ifdef __clang__
+#  pragma clang diagnostic pop
+#endif

 namespace DB
 {
--- a/src/Analyzer/ColumnTransformers.h
+++ b/src/Analyzer/ColumnTransformers.h
@ -1,11 +1,18 @@
 #pragma once

-#include <re2/re2.h>
-
 #include <Analyzer/Identifier.h>
 #include <Analyzer/IQueryTreeNode.h>
 #include <Analyzer/ListNode.h>

+#ifdef __clang__
+#  pragma clang diagnostic push
+#  pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
+#endif
+#include <re2/re2.h>
+#ifdef __clang__
+#  pragma clang diagnostic pop
+#endif
+
 namespace DB
 {

--- a/src/Analyzer/MatcherNode.h
+++ b/src/Analyzer/MatcherNode.h
@ -1,12 +1,18 @@
 #pragma once

-#include <re2/re2.h>
-
 #include <Analyzer/Identifier.h>
 #include <Analyzer/IQueryTreeNode.h>
 #include <Analyzer/ColumnTransformers.h>
 #include <Parsers/ASTAsterisk.h>

+#ifdef __clang__
+#  pragma clang diagnostic push
+#  pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
+#endif
+#include <re2/re2.h>
+#ifdef __clang__
+#  pragma clang diagnostic pop
+#endif

 namespace DB
 {
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@ -6243,11 +6243,11 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
        const auto & insertion_table = scope_context->getInsertionTable();
        if (!insertion_table.empty())
        {
-            const auto & insert_structure = DatabaseCatalog::instance()
-                                                .getTable(insertion_table, scope_context)
-                                                ->getInMemoryMetadataPtr()
-                                                ->getColumns()
-                                                .getInsertable();
+            const auto & insert_columns = DatabaseCatalog::instance()
+                                              .getTable(insertion_table, scope_context)
+                                              ->getInMemoryMetadataPtr()
+                                              ->getColumns();
+            const auto & insert_column_names = scope_context->hasInsertionTableColumnNames() ? *scope_context->getInsertionTableColumnNames() : insert_columns.getInsertable().getNames();
            DB::ColumnsDescription structure_hint;

            bool use_columns_from_insert_query = true;
@ -6255,8 +6255,8 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
            /// Insert table matches columns against SELECT expression by position, so we want to map
            /// insert table columns to table function columns through names from SELECT expression.

-            auto insert_column = insert_structure.begin();
-            auto insert_structure_end = insert_structure.end();  /// end iterator of the range covered by possible asterisk
+            auto insert_column_name_it = insert_column_names.begin();
+            auto insert_column_names_end = insert_column_names.end();  /// end iterator of the range covered by possible asterisk
            auto virtual_column_names = table_function_ptr->getVirtualsToCheckBeforeUsingStructureHint();
            bool asterisk = false;
            const auto & expression_list = scope.scope_node->as<QueryNode &>().getProjection();
@ -6264,7 +6264,7 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,

            /// We want to go through SELECT expression list and correspond each expression to column in insert table
            /// which type will be used as a hint for the file structure inference.
-            for (; expression != expression_list.end() && insert_column != insert_structure_end; ++expression)
+            for (; expression != expression_list.end() && insert_column_name_it != insert_column_names_end; ++expression)
            {
                if (auto * identifier_node = (*expression)->as<IdentifierNode>())
                {
@ -6280,15 +6280,17 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
                            break;
                        }

-                        structure_hint.add({ identifier_node->getIdentifier().getFullName(), insert_column->type });
+                        ColumnDescription column = insert_columns.get(*insert_column_name_it);
+                        column.name = identifier_node->getIdentifier().getFullName();
+                        structure_hint.add(std::move(column));
                    }

                    /// Once we hit asterisk we want to find end of the range covered by asterisk
                    /// contributing every further SELECT expression to the tail of insert structure
                    if (asterisk)
-                        --insert_structure_end;
+                        --insert_column_names_end;
                    else
-                        ++insert_column;
+                        ++insert_column_name_it;
                }
                else if (auto * matcher_node = (*expression)->as<MatcherNode>(); matcher_node && matcher_node->getMatcherType() == MatcherNodeType::ASTERISK)
                {
@ -6322,18 +6324,18 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
                    /// Once we hit asterisk we want to find end of the range covered by asterisk
                    /// contributing every further SELECT expression to the tail of insert structure
                    if (asterisk)
-                        --insert_structure_end;
+                        --insert_column_names_end;
                    else
-                        ++insert_column;
+                        ++insert_column_name_it;
                }
                else
                {
                    /// Once we hit asterisk we want to find end of the range covered by asterisk
                    /// contributing every further SELECT expression to the tail of insert structure
                    if (asterisk)
-                        --insert_structure_end;
+                        --insert_column_names_end;
                    else
-                        ++insert_column;
+                        ++insert_column_name_it;
                }
            }

@ -6353,8 +6355,8 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
                    /// Append tail of insert structure to the hint
                    if (asterisk)
                    {
-                        for (; insert_column != insert_structure_end; ++insert_column)
-                            structure_hint.add({ insert_column->name, insert_column->type });
+                        for (; insert_column_name_it != insert_column_names_end; ++insert_column_name_it)
+                            structure_hint.add(insert_columns.get(*insert_column_name_it));
                    }

                    if (!structure_hint.empty())
--- a/src/Backups/BackupIO_S3.cpp
+++ b/src/Backups/BackupIO_S3.cpp
@ -49,6 +49,7 @@ namespace
            settings.auth_settings.region,
            context->getRemoteHostFilter(),
            static_cast<unsigned>(context->getGlobalContext()->getSettingsRef().s3_max_redirects),
+            static_cast<unsigned>(context->getGlobalContext()->getSettingsRef().s3_retry_attempts),
            context->getGlobalContext()->getSettingsRef().enable_s3_requests_logging,
            /* for_disk_s3 = */ false, settings.request_settings.get_request_throttler, settings.request_settings.put_request_throttler,
            s3_uri.uri.getScheme());
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -316,7 +316,6 @@ target_link_libraries(clickhouse_common_io
            boost::context
            ch_contrib::cityhash
            ch_contrib::re2
-            ch_contrib::re2_st
            ch_contrib::zlib
            pcg_random
            Poco::Foundation
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@ -1071,7 +1071,9 @@ void ClientBase::receiveResult(ASTPtr parsed_query, Int32 signals_before_stop, b
        }
        catch (const LocalFormatError &)
        {
-            local_format_error = std::current_exception();
+            /// Remember the first exception.
+            if (!local_format_error)
+                local_format_error = std::current_exception();
            connection->sendCancel();
        }
    }
--- a/src/Columns/ColumnDecimal.cpp
+++ b/src/Columns/ColumnDecimal.cpp
@ -80,7 +80,7 @@ StringRef ColumnDecimal<T>::serializeValueIntoArena(size_t n, Arena & arena, cha
        res.data = pos;
    }
    memcpy(pos, &data[n], sizeof(T));
-    return StringRef(pos, sizeof(T));
+    return res;
 }

 template <is_decimal T>
--- a/src/Columns/ColumnUnique.h
+++ b/src/Columns/ColumnUnique.h
@ -670,9 +670,8 @@ UInt128 ColumnUnique<ColumnType>::IncrementalHash::getHash(const ColumnType & co
        for (size_t i = 0; i < column_size; ++i)
            column.updateHashWithValue(i, sip_hash);

-        hash = sip_hash.get128();
-
        std::lock_guard lock(mutex);
+        hash = sip_hash.get128();
        cur_hash = hash;
        num_added_rows.store(column_size);
    }
--- a/src/Columns/ColumnVector.cpp
+++ b/src/Columns/ColumnVector.cpp
@ -433,7 +433,7 @@ void ColumnVector<T>::updatePermutation(IColumn::PermutationSortDirection direct
 template <typename T>
 MutableColumnPtr ColumnVector<T>::cloneResized(size_t size) const
 {
-    auto res = this->create();
+    auto res = this->create(size);

    if (size > 0)
    {
--- a/src/Columns/IColumn.h
+++ b/src/Columns/IColumn.h
@ -397,6 +397,13 @@ public:
    /// It affects performance only (not correctness).
    virtual void reserve(size_t /*n*/) {}

+    /// Requests the removal of unused capacity.
+    /// It is a non-binding request to reduce the capacity of the underlying container to its size.
+    virtual MutablePtr shrinkToFit() const
+    {
+        return cloneResized(size());
+    }
+
    /// If we have another column as a source (owner of data), copy all data to ourself and reset source.
    virtual void ensureOwnership() {}

--- a/src/Common/Allocator.cpp
+++ b/src/Common/Allocator.cpp
@ -1,24 +1,8 @@
 #include "Allocator.h"

-/** Keep definition of this constant in cpp file; otherwise its value
-  * is inlined into allocator code making it impossible to override it
-  * in third-party code.
-  *
-  * Note: extern may seem redundant, but is actually needed due to bug in GCC.
-  * See also: https://gcc.gnu.org/legacy-ml/gcc-help/2017-12/msg00021.html
-  */
-#ifdef NDEBUG
-    __attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 128 * (1ULL << 20);
-#else
-    /**
-      * In debug build, use small mmap threshold to reproduce more memory
-      * stomping bugs. Along with ASLR it will hopefully detect more issues than
-      * ASan. The program may fail due to the limit on number of memory mappings.
-      *
-      * Not too small to avoid too quick exhaust of memory mappings.
-      */
-    __attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 16384;
-#endif
+
+/// Constant is chosen almost arbitrarily, what I observed is 128KB is too small, 1MB is almost indistinguishable from 64MB and 1GB is too large.
+extern const size_t POPULATE_THRESHOLD = 16 * 1024 * 1024;

 template class Allocator<false, false>;
 template class Allocator<true, false>;
--- a/src/Common/Allocator.h
+++ b/src/Common/Allocator.h
@ -20,12 +20,6 @@
 #include <sys/mman.h>

 #include <Core/Defines.h>
-#if defined(THREAD_SANITIZER) || defined(MEMORY_SANITIZER)
-    /// Thread and memory sanitizers do not intercept mremap. The usage of
-    /// mremap will lead to false positives.
-    #define DISABLE_MREMAP 1
-#endif
-#include <base/mremap.h>
 #include <base/getPageSize.h>

 #include <Common/CurrentMemoryTracker.h>
@ -35,52 +29,33 @@

 #include <Common/Allocator_fwd.h>

+#include <base/errnoToString.h>
+#include <Poco/Logger.h>
+#include <Common/logger_useful.h>

-/// Required for older Darwin builds, that lack definition of MAP_ANONYMOUS
-#ifndef MAP_ANONYMOUS
-#define MAP_ANONYMOUS MAP_ANON
-#endif

-/**
-  * Many modern allocators (for example, tcmalloc) do not do a mremap for
-  * realloc, even in case of large enough chunks of memory. Although this allows
-  * you to increase performance and reduce memory consumption during realloc.
-  * To fix this, we do mremap manually if the chunk of memory is large enough.
-  * The threshold (64 MB) is chosen quite large, since changing the address
-  * space is very slow, especially in the case of a large number of threads. We
-  * expect that the set of operations mmap/something to do/mremap can only be
-  * performed about 1000 times per second.
-  *
-  * P.S. This is also required, because tcmalloc can not allocate a chunk of
-  * memory greater than 16 GB.
-  *
-  * P.P.S. Note that MMAP_THRESHOLD symbol is intentionally made weak. It allows
-  * to override it during linkage when using ClickHouse as a library in
-  * third-party applications which may already use own allocator doing mmaps
-  * in the implementation of alloc/realloc.
-  */
-extern const size_t MMAP_THRESHOLD;
+extern const size_t POPULATE_THRESHOLD;

 static constexpr size_t MALLOC_MIN_ALIGNMENT = 8;

-namespace CurrentMetrics
-{
-    extern const Metric MMappedAllocs;
-    extern const Metric MMappedAllocBytes;
-}
-
 namespace DB
 {
+
 namespace ErrorCodes
 {
-    extern const int BAD_ARGUMENTS;
    extern const int CANNOT_ALLOCATE_MEMORY;
-    extern const int CANNOT_MUNMAP;
-    extern const int CANNOT_MREMAP;
    extern const int LOGICAL_ERROR;
 }
+
 }

+/** Previously there was a code which tried to use manual mmap and mremap (clickhouse_mremap.h) for large allocations/reallocations (64MB+).
+  * Most modern allocators (including jemalloc) don't use mremap, so the idea was to take advantage from mremap system call for large reallocs.
+  * Actually jemalloc had support for mremap, but it was intentionally removed from codebase https://github.com/jemalloc/jemalloc/commit/e2deab7a751c8080c2b2cdcfd7b11887332be1bb.
+  * Our performance tests also shows that without manual mmap/mremap/munmap clickhouse is overall faster for about 1-2% and up to 5-7x for some types of queries.
+  * That is why we don't do manual mmap/mremap/munmap here and completely rely on jemalloc for allocations of any size.
+  */
+
 /** Responsible for allocating / freeing memory. Used, for example, in PODArray, Arena.
  * Also used in hash tables.
  * The interface is different from std::allocator
@ -88,10 +63,8 @@ namespace ErrorCodes
  * - passing the size into the `free` method;
  * - by the presence of the `alignment` argument;
  * - the possibility of zeroing memory (used in hash tables);
-  * - random hint address for mmap
-  * - mmap_threshold for using mmap less or more
  */
-template <bool clear_memory_, bool mmap_populate>
+template <bool clear_memory_, bool populate>
 class Allocator
 {
 public:
@ -111,7 +84,7 @@ public:
        try
        {
            checkSize(size);
-            freeNoTrack(buf, size);
+            freeNoTrack(buf);
            auto trace = CurrentMemoryTracker::free(size);
            trace.onFree(buf, size);
        }
@ -135,8 +108,7 @@ public:
            /// nothing to do.
            /// BTW, it's not possible to change alignment while doing realloc.
        }
-        else if (old_size < MMAP_THRESHOLD && new_size < MMAP_THRESHOLD
-                 && alignment <= MALLOC_MIN_ALIGNMENT)
+        else if (alignment <= MALLOC_MIN_ALIGNMENT)
        {
            /// Resize malloc'd memory region with no special alignment requirement.
            auto trace_free = CurrentMemoryTracker::free(old_size);
@ -145,7 +117,10 @@ public:

            void * new_buf = ::realloc(buf, new_size);
            if (nullptr == new_buf)
-                DB::throwFromErrno(fmt::format("Allocator: Cannot realloc from {} to {}.", ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
+            {
+                DB::throwFromErrno(
+                    fmt::format("Allocator: Cannot realloc from {} to {}.", ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
+            }

            buf = new_buf;
            trace_alloc.onAlloc(buf, new_size);
@ -154,46 +129,18 @@ public:
                if (new_size > old_size)
                    memset(reinterpret_cast<char *>(buf) + old_size, 0, new_size - old_size);
        }
-        else if (old_size >= MMAP_THRESHOLD && new_size >= MMAP_THRESHOLD)
-        {
-            /// Resize mmap'd memory region.
-            auto trace_free = CurrentMemoryTracker::free(old_size);
-            auto trace_alloc = CurrentMemoryTracker::alloc(new_size);
-            trace_free.onFree(buf, old_size);
-
-            // On apple and freebsd self-implemented mremap used (common/mremap.h)
-            buf = clickhouse_mremap(buf, old_size, new_size, MREMAP_MAYMOVE,
-                                    PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
-            if (MAP_FAILED == buf)
-                DB::throwFromErrno(fmt::format("Allocator: Cannot mremap memory chunk from {} to {}.",
-                    ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_MREMAP);
-
-            /// No need for zero-fill, because mmap guarantees it.
-            trace_alloc.onAlloc(buf, new_size);
-        }
-        else if (new_size < MMAP_THRESHOLD)
-        {
-            /// Small allocs that requires a copy. Assume there's enough memory in system. Call CurrentMemoryTracker once.
-            auto trace_free = CurrentMemoryTracker::free(old_size);
-            auto trace_alloc = CurrentMemoryTracker::alloc(new_size);
-            trace_free.onFree(buf, old_size);
-
-            void * new_buf = allocNoTrack(new_size, alignment);
-            trace_alloc.onAlloc(buf, new_size);
-            memcpy(new_buf, buf, std::min(old_size, new_size));
-            freeNoTrack(buf, old_size);
-            buf = new_buf;
-        }
        else
        {
            /// Big allocs that requires a copy. MemoryTracker is called inside 'alloc', 'free' methods.
-
            void * new_buf = alloc(new_size, alignment);
            memcpy(new_buf, buf, std::min(old_size, new_size));
            free(buf, old_size);
            buf = new_buf;
        }

+        if constexpr (populate)
+            prefaultPages(buf, new_size);
+
        return buf;
    }

@ -205,83 +152,42 @@ protected:

    static constexpr bool clear_memory = clear_memory_;

-    // Freshly mmapped pages are copy-on-write references to a global zero page.
-    // On the first write, a page fault occurs, and an actual writable page is
-    // allocated. If we are going to use this memory soon, such as when resizing
-    // hash tables, it makes sense to pre-fault the pages by passing
-    // MAP_POPULATE to mmap(). This takes some time, but should be faster
-    // overall than having a hot loop interrupted by page faults.
-    // It is only supported on Linux.
-    static constexpr int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS
-#if defined(OS_LINUX)
-        | (mmap_populate ? MAP_POPULATE : 0)
-#endif
-        ;
-
 private:
    void * allocNoTrack(size_t size, size_t alignment)
    {
        void * buf;
-        size_t mmap_min_alignment = ::getPageSize();
-
-        if (size >= MMAP_THRESHOLD)
+        if (alignment <= MALLOC_MIN_ALIGNMENT)
        {
-            if (alignment > mmap_min_alignment)
-                throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS,
-                                    "Too large alignment {}: more than page size when allocating {}.",
-                                    ReadableSize(alignment), ReadableSize(size));
+            if constexpr (clear_memory)
+                buf = ::calloc(size, 1);
+            else
+                buf = ::malloc(size);

-            buf = mmap(getMmapHint(), size, PROT_READ | PROT_WRITE,
-                       mmap_flags, -1, 0);
-            if (MAP_FAILED == buf)
-                DB::throwFromErrno(fmt::format("Allocator: Cannot mmap {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
-            /// No need for zero-fill, because mmap guarantees it.
-
-            CurrentMetrics::add(CurrentMetrics::MMappedAllocs);
-            CurrentMetrics::add(CurrentMetrics::MMappedAllocBytes, size);
+            if (nullptr == buf)
+                DB::throwFromErrno(fmt::format("Allocator: Cannot malloc {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
        }
        else
        {
-            if (alignment <= MALLOC_MIN_ALIGNMENT)
-            {
-                if constexpr (clear_memory)
-                    buf = ::calloc(size, 1);
-                else
-                    buf = ::malloc(size);
+            buf = nullptr;
+            int res = posix_memalign(&buf, alignment, size);

-                if (nullptr == buf)
-                    DB::throwFromErrno(fmt::format("Allocator: Cannot malloc {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
-            }
-            else
-            {
-                buf = nullptr;
-                int res = posix_memalign(&buf, alignment, size);
+            if (0 != res)
+                DB::throwFromErrno(fmt::format("Cannot allocate memory (posix_memalign) {}.", ReadableSize(size)),
+                    DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, res);

-                if (0 != res)
-                    DB::throwFromErrno(fmt::format("Cannot allocate memory (posix_memalign) {}.", ReadableSize(size)),
-                        DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, res);
-
-                if constexpr (clear_memory)
-                    memset(buf, 0, size);
-            }
+            if constexpr (clear_memory)
+                memset(buf, 0, size);
        }
+
+        if constexpr (populate)
+            prefaultPages(buf, size);
+
        return buf;
    }

-    void freeNoTrack(void * buf, size_t size)
+    void freeNoTrack(void * buf)
    {
-        if (size >= MMAP_THRESHOLD)
-        {
-            if (0 != munmap(buf, size))
-                DB::throwFromErrno(fmt::format("Allocator: Cannot munmap {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_MUNMAP);
-
-            CurrentMetrics::sub(CurrentMetrics::MMappedAllocs);
-            CurrentMetrics::sub(CurrentMetrics::MMappedAllocBytes, size);
-        }
-        else
-        {
-            ::free(buf);
-        }
+        ::free(buf);
    }

    void checkSize(size_t size)
@ -291,20 +197,32 @@ private:
            throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Too large size ({}) passed to allocator. It indicates an error.", size);
    }

-#ifndef NDEBUG
-    /// In debug builds, request mmap() at random addresses (a kind of ASLR), to
-    /// reproduce more memory stomping bugs. Note that Linux doesn't do it by
-    /// default. This may lead to worse TLB performance.
-    void * getMmapHint()
+    /// Address passed to madvise is required to be aligned to the page boundary.
+    auto adjustToPageSize(void * buf, size_t len, size_t page_size)
    {
-        return reinterpret_cast<void *>(std::uniform_int_distribution<intptr_t>(0x100000000000UL, 0x700000000000UL)(thread_local_rng));
+        const uintptr_t address_numeric = reinterpret_cast<uintptr_t>(buf);
+        const size_t next_page_start = ((address_numeric + page_size - 1) / page_size) * page_size;
+        return std::make_pair(reinterpret_cast<void *>(next_page_start), len - (next_page_start - address_numeric));
    }
-#else
-    void * getMmapHint()
+
+    void prefaultPages([[maybe_unused]] void * buf_, [[maybe_unused]] size_t len_)
    {
-        return nullptr;
-    }
+#if defined(MADV_POPULATE_WRITE)
+        if (len_ < POPULATE_THRESHOLD)
+            return;
+
+        static const size_t page_size = ::getPageSize();
+        if (len_ < page_size) /// Rounded address should be still within [buf, buf + len).
+            return;
+
+        auto [buf, len] = adjustToPageSize(buf_, len_, page_size);
+        if (auto res = ::madvise(buf, len, MADV_POPULATE_WRITE); res < 0)
+            LOG_TRACE(
+                LogFrequencyLimiter(&Poco::Logger::get("Allocator"), 1),
+                "Attempt to populate pages failed: {} (EINVAL is expected for kernels < 5.14)",
+                errnoToString(res));
 #endif
+    }
 };


--- a/src/Common/Allocator_fwd.h
+++ b/src/Common/Allocator_fwd.h
@ -3,7 +3,7 @@
  * This file provides forward declarations for Allocator.
  */

-template <bool clear_memory_, bool mmap_populate = false>
+template <bool clear_memory_, bool populate = false>
 class Allocator;

 template <typename Base, size_t N = 64, size_t Alignment = 1>
--- a/src/Common/CurrentMetrics.cpp
+++ b/src/Common/CurrentMetrics.cpp
@ -175,8 +175,6 @@
    M(PartsInMemory, "In-memory parts.") \
    M(MMappedFiles, "Total number of mmapped files.") \
    M(MMappedFileBytes, "Sum size of mmapped file regions.") \
-    M(MMappedAllocs, "Total number of mmapped allocations") \
-    M(MMappedAllocBytes, "Sum bytes of mmapped allocations") \
    M(AsynchronousReadWait, "Number of threads waiting for asynchronous read.") \
    M(PendingAsyncInsert, "Number of asynchronous inserts that are waiting for flush.") \
    M(KafkaConsumers, "Number of active Kafka consumers") \
--- a/src/Common/DateLUTImpl.h
+++ b/src/Common/DateLUTImpl.h
@ -71,14 +71,14 @@ private:
    // Same as above but select different function overloads for zero saturation.
    STRONG_TYPEDEF(UInt32, LUTIndexWithSaturation)

-    static inline LUTIndex normalizeLUTIndex(UInt32 index)
+    static LUTIndex normalizeLUTIndex(UInt32 index)
    {
        if (index >= DATE_LUT_SIZE)
            return LUTIndex(DATE_LUT_SIZE - 1);
        return LUTIndex{index};
    }

-    static inline LUTIndex normalizeLUTIndex(Int64 index)
+    static LUTIndex normalizeLUTIndex(Int64 index)
    {
        if (unlikely(index < 0))
            return LUTIndex(0);
@ -88,59 +88,59 @@ private:
    }

    template <typename T>
-    friend inline LUTIndex operator+(const LUTIndex & index, const T v)
+    friend LUTIndex operator+(const LUTIndex & index, const T v)
    {
        return normalizeLUTIndex(index.toUnderType() + UInt32(v));
    }

    template <typename T>
-    friend inline LUTIndex operator+(const T v, const LUTIndex & index)
+    friend LUTIndex operator+(const T v, const LUTIndex & index)
    {
        return normalizeLUTIndex(static_cast<Int64>(v + index.toUnderType()));
    }

-    friend inline LUTIndex operator+(const LUTIndex & index, const LUTIndex & v)
+    friend LUTIndex operator+(const LUTIndex & index, const LUTIndex & v)
    {
        return normalizeLUTIndex(static_cast<UInt32>(index.toUnderType() + v.toUnderType()));
    }

    template <typename T>
-    friend inline LUTIndex operator-(const LUTIndex & index, const T v)
+    friend LUTIndex operator-(const LUTIndex & index, const T v)
    {
        return normalizeLUTIndex(static_cast<Int64>(index.toUnderType() - UInt32(v)));
    }

    template <typename T>
-    friend inline LUTIndex operator-(const T v, const LUTIndex & index)
+    friend LUTIndex operator-(const T v, const LUTIndex & index)
    {
        return normalizeLUTIndex(static_cast<Int64>(v - index.toUnderType()));
    }

-    friend inline LUTIndex operator-(const LUTIndex & index, const LUTIndex & v)
+    friend LUTIndex operator-(const LUTIndex & index, const LUTIndex & v)
    {
        return normalizeLUTIndex(static_cast<Int64>(index.toUnderType() - v.toUnderType()));
    }

    template <typename T>
-    friend inline LUTIndex operator*(const LUTIndex & index, const T v)
+    friend LUTIndex operator*(const LUTIndex & index, const T v)
    {
        return normalizeLUTIndex(index.toUnderType() * UInt32(v));
    }

    template <typename T>
-    friend inline LUTIndex operator*(const T v, const LUTIndex & index)
+    friend LUTIndex operator*(const T v, const LUTIndex & index)
    {
        return normalizeLUTIndex(v * index.toUnderType());
    }

    template <typename T>
-    friend inline LUTIndex operator/(const LUTIndex & index, const T v)
+    friend LUTIndex operator/(const LUTIndex & index, const T v)
    {
        return normalizeLUTIndex(index.toUnderType() / UInt32(v));
    }

    template <typename T>
-    friend inline LUTIndex operator/(const T v, const LUTIndex & index)
+    friend LUTIndex operator/(const T v, const LUTIndex & index)
    {
        return normalizeLUTIndex(UInt32(v) / index.toUnderType());
    }
@ -172,12 +172,12 @@ public:
        Int8 amount_of_offset_change_value; /// Usually -4 or 4, but look at Lord Howe Island. Multiply by OffsetChangeFactor
        UInt8 time_at_offset_change_value; /// In seconds from beginning of the day. Multiply by OffsetChangeFactor

-        inline Int32 amount_of_offset_change() const /// NOLINT
+        Int32 amount_of_offset_change() const /// NOLINT
        {
            return static_cast<Int32>(amount_of_offset_change_value) * OffsetChangeFactor;
        }

-        inline UInt32 time_at_offset_change() const /// NOLINT
+        UInt32 time_at_offset_change() const /// NOLINT
        {
            return static_cast<UInt32>(time_at_offset_change_value) * OffsetChangeFactor;
        }
@ -221,7 +221,7 @@ private:
    /// Time zone name.
    std::string time_zone;

-    inline LUTIndex findIndex(Time t) const
+    LUTIndex findIndex(Time t) const
    {
        /// First guess.
        Time guess = (t / 86400) + daynum_offset_epoch;
@ -248,34 +248,34 @@ private:
        return LUTIndex(guess ? static_cast<unsigned>(guess) - 1 : 0);
    }

-    static inline LUTIndex toLUTIndex(DayNum d)
+    static LUTIndex toLUTIndex(DayNum d)
    {
        return normalizeLUTIndex(d + daynum_offset_epoch);
    }

-    static inline LUTIndex toLUTIndex(ExtendedDayNum d)
+    static LUTIndex toLUTIndex(ExtendedDayNum d)
    {
        return normalizeLUTIndex(static_cast<Int64>(d + daynum_offset_epoch));
    }

-    inline LUTIndex toLUTIndex(Time t) const
+    LUTIndex toLUTIndex(Time t) const
    {
        return findIndex(t);
    }

-    static inline LUTIndex toLUTIndex(LUTIndex i)
+    static LUTIndex toLUTIndex(LUTIndex i)
    {
        return i;
    }

    template <typename DateOrTime>
-    inline const Values & find(DateOrTime v) const
+    const Values & find(DateOrTime v) const
    {
        return lut[toLUTIndex(v)];
    }

    template <typename DateOrTime, typename Divisor>
-    inline DateOrTime roundDown(DateOrTime x, Divisor divisor) const
+    DateOrTime roundDown(DateOrTime x, Divisor divisor) const
    {
        static_assert(std::is_integral_v<DateOrTime> && std::is_integral_v<Divisor>);
        assert(divisor > 0);
@ -336,7 +336,7 @@ public:
    }

    template <typename DateOrTime>
-    inline auto toDayNum(DateOrTime v) const
+    auto toDayNum(DateOrTime v) const
    {
        if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
            return DayNum{static_cast<DayNum::UnderlyingType>(saturateMinus(toLUTIndex(v).toUnderType(), daynum_offset_epoch))};
@ -346,7 +346,7 @@ public:

    /// Round down to start of monday.
    template <typename DateOrTime>
-    inline Time toFirstDayOfWeek(DateOrTime v) const
+    Time toFirstDayOfWeek(DateOrTime v) const
    {
        const LUTIndex i = toLUTIndex(v);
        if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
@ -356,7 +356,7 @@ public:
    }

    template <typename DateOrTime>
-    inline auto toFirstDayNumOfWeek(DateOrTime v) const
+    auto toFirstDayNumOfWeek(DateOrTime v) const
    {
        const LUTIndex i = toLUTIndex(v);
        if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
@ -367,7 +367,7 @@ public:

    /// Round up to the last day of week.
    template <typename DateOrTime>
-    inline Time toLastDayOfWeek(DateOrTime v) const
+    Time toLastDayOfWeek(DateOrTime v) const
    {
        const LUTIndex i = toLUTIndex(v);
        if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
@ -377,7 +377,7 @@ public:
    }

    template <typename DateOrTime>
-    inline auto toLastDayNumOfWeek(DateOrTime v) const
+    auto toLastDayNumOfWeek(DateOrTime v) const
    {
        const LUTIndex i = toLUTIndex(v);
        if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
@ -388,7 +388,7 @@ public:

    /// Round down to start of month.
    template <typename DateOrTime>
-    inline Time toFirstDayOfMonth(DateOrTime v) const
+    Time toFirstDayOfMonth(DateOrTime v) const
    {
        const LUTIndex i = toLUTIndex(v);
        if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
@ -398,7 +398,7 @@ public:
    }

    template <typename DateOrTime>
-    inline auto toFirstDayNumOfMonth(DateOrTime v) const
+    auto toFirstDayNumOfMonth(DateOrTime v) const
    {
        const LUTIndex i = toLUTIndex(v);
        if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
@ -409,7 +409,7 @@ public:

    /// Round up to last day of month.
    template <typename DateOrTime>
-    inline Time toLastDayOfMonth(DateOrTime v) const
+    Time toLastDayOfMonth(DateOrTime v) const
    {
        const LUTIndex i = toLUTIndex(v);
        if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
@ -419,7 +419,7 @@ public:
    }

    template <typename DateOrTime>
-    inline auto toLastDayNumOfMonth(DateOrTime v) const
+    auto toLastDayNumOfMonth(DateOrTime v) const
    {
        const LUTIndex i = toLUTIndex(v);
        if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
@ -430,7 +430,7 @@ public:

    /// Round down to start of quarter.
    template <typename DateOrTime>
-    inline auto toFirstDayNumOfQuarter(DateOrTime v) const
+    auto toFirstDayNumOfQuarter(DateOrTime v) const
    {
        if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
            return toDayNum(LUTIndexWithSaturation(toFirstDayOfQuarterIndex(v)));
@ -439,7 +439,7 @@ public:
    }

    template <typename DateOrTime>
-    inline LUTIndex toFirstDayOfQuarterIndex(DateOrTime v) const
+    LUTIndex toFirstDayOfQuarterIndex(DateOrTime v) const
    {
        LUTIndex index = toLUTIndex(v);
        size_t month_inside_quarter = (lut[index].month - 1) % 3;
@ -455,25 +455,25 @@ public:
    }

    template <typename DateOrTime>
-    inline Time toFirstDayOfQuarter(DateOrTime v) const
+    Time toFirstDayOfQuarter(DateOrTime v) const
    {
        return toDate(toFirstDayOfQuarterIndex(v));
    }

    /// Round down to start of year.
-    inline Time toFirstDayOfYear(Time t) const
+    Time toFirstDayOfYear(Time t) const
    {
        return lut[years_lut[lut[findIndex(t)].year - DATE_LUT_MIN_YEAR]].date;
    }

    template <typename DateOrTime>
-    inline LUTIndex toFirstDayNumOfYearIndex(DateOrTime v) const
+    LUTIndex toFirstDayNumOfYearIndex(DateOrTime v) const
    {
        return years_lut[lut[toLUTIndex(v)].year - DATE_LUT_MIN_YEAR];
    }

    template <typename DateOrTime>
-    inline auto toFirstDayNumOfYear(DateOrTime v) const
+    auto toFirstDayNumOfYear(DateOrTime v) const
    {
        if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
            return toDayNum(LUTIndexWithSaturation(toFirstDayNumOfYearIndex(v)));
@ -481,14 +481,14 @@ public:
            return toDayNum(LUTIndex(toFirstDayNumOfYearIndex(v)));
    }

-    inline Time toFirstDayOfNextMonth(Time t) const
+    Time toFirstDayOfNextMonth(Time t) const
    {
        LUTIndex index = findIndex(t);
        index += 32 - lut[index].day_of_month;
        return lut[index - (lut[index].day_of_month - 1)].date;
    }

-    inline Time toFirstDayOfPrevMonth(Time t) const
+    Time toFirstDayOfPrevMonth(Time t) const
    {
        LUTIndex index = findIndex(t);
        index -= lut[index].day_of_month;
@ -496,13 +496,13 @@ public:
    }

    template <typename DateOrTime>
-    inline UInt8 daysInMonth(DateOrTime value) const
+    UInt8 daysInMonth(DateOrTime value) const
    {
        const LUTIndex i = toLUTIndex(value);
        return lut[i].days_in_month;
    }

-    inline UInt8 daysInMonth(Int16 year, UInt8 month) const
+    UInt8 daysInMonth(Int16 year, UInt8 month) const
    {
        UInt16 idx = year - DATE_LUT_MIN_YEAR;
        if (unlikely(idx >= DATE_LUT_YEARS))
@ -515,12 +515,12 @@ public:

    /** Round to start of day, then shift for specified amount of days.
      */
-    inline Time toDateAndShift(Time t, Int32 days) const
+    Time toDateAndShift(Time t, Int32 days) const
    {
        return lut[findIndex(t) + days].date;
    }

-    inline Time toTime(Time t) const
+    Time toTime(Time t) const
    {
        const LUTIndex index = findIndex(t);

@ -532,7 +532,7 @@ public:
        return res - offset_at_start_of_epoch; /// Starting at 1970-01-01 00:00:00 local time.
    }

-    inline unsigned toHour(Time t) const
+    unsigned toHour(Time t) const
    {
        const LUTIndex index = findIndex(t);

@ -552,7 +552,7 @@ public:
      * then subtract the former from the latter to get the offset result.
      * The boundaries when meets DST(daylight saving time) change should be handled very carefully.
      */
-    inline Time timezoneOffset(Time t) const
+    Time timezoneOffset(Time t) const
    {
        const LUTIndex index = findIndex(t);

@ -574,7 +574,7 @@ public:
    }


-    inline unsigned toSecond(Time t) const
+    unsigned toSecond(Time t) const
    {
        if (likely(offset_is_whole_number_of_minutes_during_epoch))
        {
@ -593,7 +593,7 @@ public:
        return time % 60;
    }

-    inline unsigned toMinute(Time t) const
+    unsigned toMinute(Time t) const
    {
        if (t >= 0 && offset_is_whole_number_of_hours_during_epoch)
            return (t / 60) % 60;
@ -630,11 +630,11 @@ public:
      *  because the same calendar day starts/ends at different timestamps in different time zones)
      */

-    inline Time fromDayNum(DayNum d) const { return lut_saturated[toLUTIndex(d)].date; }
-    inline Time fromDayNum(ExtendedDayNum d) const { return lut[toLUTIndex(d)].date; }
+    Time fromDayNum(DayNum d) const { return lut_saturated[toLUTIndex(d)].date; }
+    Time fromDayNum(ExtendedDayNum d) const { return lut[toLUTIndex(d)].date; }

    template <typename DateOrTime>
-    inline Time toDate(DateOrTime v) const
+    Time toDate(DateOrTime v) const
    {
        if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
            return lut_saturated[toLUTIndex(v)].date;
@ -643,20 +643,20 @@ public:
    }

    template <typename DateOrTime>
-    inline UInt8 toMonth(DateOrTime v) const { return lut[toLUTIndex(v)].month; }
+    UInt8 toMonth(DateOrTime v) const { return lut[toLUTIndex(v)].month; }

    template <typename DateOrTime>
-    inline UInt8 toQuarter(DateOrTime v) const { return (lut[toLUTIndex(v)].month - 1) / 3 + 1; }
+    UInt8 toQuarter(DateOrTime v) const { return (lut[toLUTIndex(v)].month - 1) / 3 + 1; }

    template <typename DateOrTime>
-    inline Int16 toYear(DateOrTime v) const { return lut[toLUTIndex(v)].year; }
+    Int16 toYear(DateOrTime v) const { return lut[toLUTIndex(v)].year; }

    /// 1-based, starts on Monday
    template <typename DateOrTime>
-    inline UInt8 toDayOfWeek(DateOrTime v) const { return lut[toLUTIndex(v)].day_of_week; }
+    UInt8 toDayOfWeek(DateOrTime v) const { return lut[toLUTIndex(v)].day_of_week; }

    template <typename DateOrTime>
-    inline UInt8 toDayOfWeek(DateOrTime v, UInt8 week_day_mode) const
+    UInt8 toDayOfWeek(DateOrTime v, UInt8 week_day_mode) const
    {
        WeekDayMode mode = check_week_day_mode(week_day_mode);

@ -674,10 +674,10 @@ public:
    }

    template <typename DateOrTime>
-    inline UInt8 toDayOfMonth(DateOrTime v) const { return lut[toLUTIndex(v)].day_of_month; }
+    UInt8 toDayOfMonth(DateOrTime v) const { return lut[toLUTIndex(v)].day_of_month; }

    template <typename DateOrTime>
-    inline UInt16 toDayOfYear(DateOrTime v) const
+    UInt16 toDayOfYear(DateOrTime v) const
    {
        // TODO: different overload for ExtendedDayNum
        const LUTIndex i = toLUTIndex(v);
@ -688,7 +688,7 @@ public:
    /// (round down to monday and divide DayNum by 7; we made an assumption,
    ///  that in domain of the function there was no weeks with any other number of days than 7)
    template <typename DateOrTime>
-    inline Int32 toRelativeWeekNum(DateOrTime v) const
+    Int32 toRelativeWeekNum(DateOrTime v) const
    {
        const LUTIndex i = toLUTIndex(v);
        /// We add 8 to avoid underflow at beginning of unix epoch.
@ -697,7 +697,7 @@ public:

    /// Get year that contains most of the current week. Week begins at monday.
    template <typename DateOrTime>
-    inline Int16 toISOYear(DateOrTime v) const
+    Int16 toISOYear(DateOrTime v) const
    {
        const LUTIndex i = toLUTIndex(v);
        /// That's effectively the year of thursday of current week.
@ -708,7 +708,7 @@ public:
    /// Example: ISO year 2019 begins at 2018-12-31. And ISO year 2017 begins at 2017-01-02.
    /// https://en.wikipedia.org/wiki/ISO_week_date
    template <typename DateOrTime>
-    inline LUTIndex toFirstDayNumOfISOYearIndex(DateOrTime v) const
+    LUTIndex toFirstDayNumOfISOYearIndex(DateOrTime v) const
    {
        const LUTIndex i = toLUTIndex(v);
        auto iso_year = toISOYear(i);
@ -722,7 +722,7 @@ public:
    }

    template <typename DateOrTime>
-    inline auto toFirstDayNumOfISOYear(DateOrTime v) const
+    auto toFirstDayNumOfISOYear(DateOrTime v) const
    {
        if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
            return toDayNum(LUTIndexWithSaturation(toFirstDayNumOfISOYearIndex(v)));
@ -730,7 +730,7 @@ public:
            return toDayNum(LUTIndex(toFirstDayNumOfISOYearIndex(v)));
    }

-    inline Time toFirstDayOfISOYear(Time t) const
+    Time toFirstDayOfISOYear(Time t) const
    {
        return lut[toFirstDayNumOfISOYearIndex(t)].date;
    }
@ -738,7 +738,7 @@ public:
    /// ISO 8601 week number. Week begins at monday.
    /// The week number 1 is the first week in year that contains 4 or more days (that's more than half).
    template <typename DateOrTime>
-    inline UInt8 toISOWeek(DateOrTime v) const
+    UInt8 toISOWeek(DateOrTime v) const
    {
        return 1 + (toFirstDayNumOfWeek(v) - toDayNum(toFirstDayNumOfISOYearIndex(v))) / 7;
    }
@ -777,7 +777,7 @@ public:
        next week is week 1.
    */
    template <typename DateOrTime>
-    inline YearWeek toYearWeek(DateOrTime v, UInt8 week_mode) const
+    YearWeek toYearWeek(DateOrTime v, UInt8 week_mode) const
    {
        const bool newyear_day_mode = week_mode & static_cast<UInt8>(WeekModeFlag::NEWYEAR_DAY);
        week_mode = check_week_mode(week_mode);
@ -836,7 +836,7 @@ public:
    /// Calculate week number of WeekModeFlag::NEWYEAR_DAY mode
    /// The week number 1 is the first week in year that contains January 1,
    template <typename DateOrTime>
-    inline YearWeek toYearWeekOfNewyearMode(DateOrTime v, bool monday_first_mode) const
+    YearWeek toYearWeekOfNewyearMode(DateOrTime v, bool monday_first_mode) const
    {
        YearWeek yw(0, 0);
        UInt16 offset_day = monday_first_mode ? 0U : 1U;
@ -870,7 +870,7 @@ public:

    /// Get first day of week with week_mode, return Sunday or Monday
    template <typename DateOrTime>
-    inline auto toFirstDayNumOfWeek(DateOrTime v, UInt8 week_mode) const
+    auto toFirstDayNumOfWeek(DateOrTime v, UInt8 week_mode) const
    {
        bool monday_first_mode = week_mode & static_cast<UInt8>(WeekModeFlag::MONDAY_FIRST);
        if (monday_first_mode)
@ -889,7 +889,7 @@ public:

    /// Get last day of week with week_mode, return Saturday or Sunday
    template <typename DateOrTime>
-    inline auto toLastDayNumOfWeek(DateOrTime v, UInt8 week_mode) const
+    auto toLastDayNumOfWeek(DateOrTime v, UInt8 week_mode) const
    {
        bool monday_first_mode = week_mode & static_cast<UInt8>(WeekModeFlag::MONDAY_FIRST);
        if (monday_first_mode)
@ -908,7 +908,7 @@ public:
    }

    /// Check and change mode to effective.
-    inline UInt8 check_week_mode(UInt8 mode) const /// NOLINT
+    UInt8 check_week_mode(UInt8 mode) const /// NOLINT
    {
        UInt8 week_format = (mode & 7);
        if (!(week_format & static_cast<UInt8>(WeekModeFlag::MONDAY_FIRST)))
@ -917,7 +917,7 @@ public:
    }

    /// Check and change mode to effective.
-    inline WeekDayMode check_week_day_mode(UInt8 mode) const /// NOLINT
+    WeekDayMode check_week_day_mode(UInt8 mode) const /// NOLINT
    {
        return static_cast<WeekDayMode>(mode & 3);
    }
@ -926,7 +926,7 @@ public:
      * Returns 0 for monday, 1 for tuesday...
      */
    template <typename DateOrTime>
-    inline UInt8 calc_weekday(DateOrTime v, bool sunday_first_day_of_week) const /// NOLINT
+    UInt8 calc_weekday(DateOrTime v, bool sunday_first_day_of_week) const /// NOLINT
    {
        const LUTIndex i = toLUTIndex(v);
        if (!sunday_first_day_of_week)
@ -936,28 +936,28 @@ public:
    }

    /// Calculate days in one year.
-    inline UInt16 calc_days_in_year(Int32 year) const /// NOLINT
+    UInt16 calc_days_in_year(Int32 year) const /// NOLINT
    {
        return ((year & 3) == 0 && (year % 100 || (year % 400 == 0 && year)) ? 366 : 365);
    }

    /// Number of month from some fixed moment in the past (year * 12 + month)
    template <typename DateOrTime>
-    inline Int32 toRelativeMonthNum(DateOrTime v) const
+    Int32 toRelativeMonthNum(DateOrTime v) const
    {
        const LUTIndex i = toLUTIndex(v);
        return lut[i].year * 12 + lut[i].month;
    }

    template <typename DateOrTime>
-    inline Int32 toRelativeQuarterNum(DateOrTime v) const
+    Int32 toRelativeQuarterNum(DateOrTime v) const
    {
        const LUTIndex i = toLUTIndex(v);
        return lut[i].year * 4 + (lut[i].month - 1) / 3;
    }

    /// We count all hour-length intervals, unrelated to offset changes.
-    inline Time toRelativeHourNum(Time t) const
+    Time toRelativeHourNum(Time t) const
    {
        if (t >= 0 && offset_is_whole_number_of_hours_during_epoch)
            return t / 3600;
@ -968,37 +968,37 @@ public:
    }

    template <typename DateOrTime>
-    inline Time toRelativeHourNum(DateOrTime v) const
+    Time toRelativeHourNum(DateOrTime v) const
    {
        return toRelativeHourNum(lut[toLUTIndex(v)].date);
    }

    /// The same formula is used for positive time (after Unix epoch) and negative time (before Unix epoch).
    /// It’s needed for correct work of dateDiff function.
-    inline Time toStableRelativeHourNum(Time t) const
+    Time toStableRelativeHourNum(Time t) const
    {
        return (t + DATE_LUT_ADD + 86400 - offset_at_start_of_epoch) / 3600 - (DATE_LUT_ADD / 3600);
    }

    template <typename DateOrTime>
-    inline Time toStableRelativeHourNum(DateOrTime v) const
+    Time toStableRelativeHourNum(DateOrTime v) const
    {
        return toStableRelativeHourNum(lut[toLUTIndex(v)].date);
    }

-    inline Time toRelativeMinuteNum(Time t) const /// NOLINT
+    Time toRelativeMinuteNum(Time t) const /// NOLINT
    {
        return (t + DATE_LUT_ADD) / 60 - (DATE_LUT_ADD / 60);
    }

    template <typename DateOrTime>
-    inline Time toRelativeMinuteNum(DateOrTime v) const
+    Time toRelativeMinuteNum(DateOrTime v) const
    {
        return toRelativeMinuteNum(lut[toLUTIndex(v)].date);
    }

    template <typename DateOrTime>
-    inline auto toStartOfYearInterval(DateOrTime v, UInt64 years) const
+    auto toStartOfYearInterval(DateOrTime v, UInt64 years) const
    {
        if (years == 1)
            return toFirstDayNumOfYear(v);
@ -1019,7 +1019,7 @@ public:

    template <typename Date>
    requires std::is_same_v<Date, DayNum> || std::is_same_v<Date, ExtendedDayNum>
-    inline auto toStartOfQuarterInterval(Date d, UInt64 quarters) const
+    auto toStartOfQuarterInterval(Date d, UInt64 quarters) const
    {
        if (quarters == 1)
            return toFirstDayNumOfQuarter(d);
@ -1028,7 +1028,7 @@ public:

    template <typename Date>
    requires std::is_same_v<Date, DayNum> || std::is_same_v<Date, ExtendedDayNum>
-    inline auto toStartOfMonthInterval(Date d, UInt64 months) const
+    auto toStartOfMonthInterval(Date d, UInt64 months) const
    {
        if (months == 1)
            return toFirstDayNumOfMonth(d);
@ -1042,7 +1042,7 @@ public:

    template <typename Date>
    requires std::is_same_v<Date, DayNum> || std::is_same_v<Date, ExtendedDayNum>
-    inline auto toStartOfWeekInterval(Date d, UInt64 weeks) const
+    auto toStartOfWeekInterval(Date d, UInt64 weeks) const
    {
        if (weeks == 1)
            return toFirstDayNumOfWeek(d);
@ -1056,7 +1056,7 @@ public:

    template <typename Date>
    requires std::is_same_v<Date, DayNum> || std::is_same_v<Date, ExtendedDayNum>
-    inline Time toStartOfDayInterval(Date d, UInt64 days) const
+    Time toStartOfDayInterval(Date d, UInt64 days) const
    {
        if (days == 1)
            return toDate(d);
@ -1152,7 +1152,7 @@ public:
        return static_cast<DateOrTime>(roundDown(t, seconds));
    }

-    inline LUTIndex makeLUTIndex(Int16 year, UInt8 month, UInt8 day_of_month) const
+    LUTIndex makeLUTIndex(Int16 year, UInt8 month, UInt8 day_of_month) const
    {
        if (unlikely(year < DATE_LUT_MIN_YEAR || month < 1 || month > 12 || day_of_month < 1 || day_of_month > 31))
            return LUTIndex(0);
@ -1167,7 +1167,7 @@ public:
    }

    /// Create DayNum from year, month, day of month.
-    inline ExtendedDayNum makeDayNum(Int16 year, UInt8 month, UInt8 day_of_month, Int32 default_error_day_num = 0) const
+    ExtendedDayNum makeDayNum(Int16 year, UInt8 month, UInt8 day_of_month, Int32 default_error_day_num = 0) const
    {
        if (unlikely(year < DATE_LUT_MIN_YEAR || month < 1 || month > 12 || day_of_month < 1 || day_of_month > 31))
            return ExtendedDayNum(default_error_day_num);
@ -1175,14 +1175,14 @@ public:
        return toDayNum(makeLUTIndex(year, month, day_of_month));
    }

-    inline Time makeDate(Int16 year, UInt8 month, UInt8 day_of_month) const
+    Time makeDate(Int16 year, UInt8 month, UInt8 day_of_month) const
    {
        return lut[makeLUTIndex(year, month, day_of_month)].date;
    }

    /** Does not accept daylight saving time as argument: in case of ambiguity, it choose greater timestamp.
      */
-    inline Time makeDateTime(Int16 year, UInt8 month, UInt8 day_of_month, UInt8 hour, UInt8 minute, UInt8 second) const
+    Time makeDateTime(Int16 year, UInt8 month, UInt8 day_of_month, UInt8 hour, UInt8 minute, UInt8 second) const
    {
        size_t index = makeLUTIndex(year, month, day_of_month);
        Time time_offset = hour * 3600 + minute * 60 + second;
@ -1194,28 +1194,28 @@ public:
    }

    template <typename DateOrTime>
-    inline const Values & getValues(DateOrTime v) const { return lut[toLUTIndex(v)]; }
+    const Values & getValues(DateOrTime v) const { return lut[toLUTIndex(v)]; }

    template <typename DateOrTime>
-    inline UInt32 toNumYYYYMM(DateOrTime v) const
+    UInt32 toNumYYYYMM(DateOrTime v) const
    {
        const Values & values = getValues(v);
        return values.year * 100 + values.month;
    }

    template <typename DateOrTime>
-    inline UInt32 toNumYYYYMMDD(DateOrTime v) const
+    UInt32 toNumYYYYMMDD(DateOrTime v) const
    {
        const Values & values = getValues(v);
        return values.year * 10000 + values.month * 100 + values.day_of_month;
    }

-    inline Time YYYYMMDDToDate(UInt32 num) const /// NOLINT
+    Time YYYYMMDDToDate(UInt32 num) const /// NOLINT
    {
        return makeDate(num / 10000, num / 100 % 100, num % 100);
    }

-    inline ExtendedDayNum YYYYMMDDToDayNum(UInt32 num) const /// NOLINT
+    ExtendedDayNum YYYYMMDDToDayNum(UInt32 num) const /// NOLINT
    {
        return makeDayNum(num / 10000, num / 100 % 100, num % 100);
    }
@ -1241,13 +1241,13 @@ public:
        TimeComponents time;
    };

-    inline DateComponents toDateComponents(Time t) const
+    DateComponents toDateComponents(Time t) const
    {
        const Values & values = getValues(t);
        return { values.year, values.month, values.day_of_month };
    }

-    inline DateTimeComponents toDateTimeComponents(Time t) const
+    DateTimeComponents toDateTimeComponents(Time t) const
    {
        const LUTIndex index = findIndex(t);
        const Values & values = lut[index];
@ -1283,12 +1283,12 @@ public:
    }

    template <typename DateOrTime>
-    inline DateTimeComponents toDateTimeComponents(DateOrTime v) const
+    DateTimeComponents toDateTimeComponents(DateOrTime v) const
    {
        return toDateTimeComponents(lut[toLUTIndex(v)].date);
    }

-    inline UInt64 toNumYYYYMMDDhhmmss(Time t) const
+    UInt64 toNumYYYYMMDDhhmmss(Time t) const
    {
        DateTimeComponents components = toDateTimeComponents(t);

@ -1301,7 +1301,7 @@ public:
            + UInt64(components.date.year) * 10000000000;
    }

-    inline Time YYYYMMDDhhmmssToTime(UInt64 num) const /// NOLINT
+    Time YYYYMMDDhhmmssToTime(UInt64 num) const /// NOLINT
    {
        return makeDateTime(
            num / 10000000000,
@ -1315,7 +1315,7 @@ public:
    /// Adding calendar intervals.
    /// Implementation specific behaviour when delta is too big.

-    inline NO_SANITIZE_UNDEFINED Time addDays(Time t, Int64 delta) const
+    NO_SANITIZE_UNDEFINED Time addDays(Time t, Int64 delta) const
    {
        const LUTIndex index = findIndex(t);
        const Values & values = lut[index];
@ -1332,12 +1332,12 @@ public:
        return lut[new_index].date + time;
    }

-    inline NO_SANITIZE_UNDEFINED Time addWeeks(Time t, Int64 delta) const
+    NO_SANITIZE_UNDEFINED Time addWeeks(Time t, Int64 delta) const
    {
        return addDays(t, delta * 7);
    }

-    inline UInt8 saturateDayOfMonth(Int16 year, UInt8 month, UInt8 day_of_month) const
+    UInt8 saturateDayOfMonth(Int16 year, UInt8 month, UInt8 day_of_month) const
    {
        if (likely(day_of_month <= 28))
            return day_of_month;
@ -1351,7 +1351,7 @@ public:
    }

    template <typename DateOrTime>
-    inline LUTIndex NO_SANITIZE_UNDEFINED addMonthsIndex(DateOrTime v, Int64 delta) const
+    LUTIndex NO_SANITIZE_UNDEFINED addMonthsIndex(DateOrTime v, Int64 delta) const
    {
        const Values & values = lut[toLUTIndex(v)];

@ -1375,11 +1375,11 @@ public:
        }
    }

-    /// If resulting month has less deys than source month, then saturation can happen.
+    /// If resulting month has less days than source month, then saturation can happen.
    /// Example: 31 Aug + 1 month = 30 Sep.
    template <typename DateTime>
    requires std::is_same_v<DateTime, UInt32> || std::is_same_v<DateTime, Int64> || std::is_same_v<DateTime, time_t>
-    inline Time NO_SANITIZE_UNDEFINED addMonths(DateTime t, Int64 delta) const
+    Time NO_SANITIZE_UNDEFINED addMonths(DateTime t, Int64 delta) const
    {
        const auto result_day = addMonthsIndex(t, delta);

@ -1405,7 +1405,7 @@ public:

    template <typename Date>
    requires std::is_same_v<Date, DayNum> || std::is_same_v<Date, ExtendedDayNum>
-    inline auto NO_SANITIZE_UNDEFINED addMonths(Date d, Int64 delta) const
+    auto NO_SANITIZE_UNDEFINED addMonths(Date d, Int64 delta) const
    {
        if constexpr (std::is_same_v<Date, DayNum>)
            return toDayNum(LUTIndexWithSaturation(addMonthsIndex(d, delta)));
@ -1414,13 +1414,13 @@ public:
    }

    template <typename DateOrTime>
-    inline auto NO_SANITIZE_UNDEFINED addQuarters(DateOrTime d, Int64 delta) const
+    auto NO_SANITIZE_UNDEFINED addQuarters(DateOrTime d, Int64 delta) const
    {
        return addMonths(d, delta * 3);
    }

    template <typename DateOrTime>
-    inline LUTIndex NO_SANITIZE_UNDEFINED addYearsIndex(DateOrTime v, Int64 delta) const
+    LUTIndex NO_SANITIZE_UNDEFINED addYearsIndex(DateOrTime v, Int64 delta) const
    {
        const Values & values = lut[toLUTIndex(v)];

@ -1438,7 +1438,7 @@ public:
    /// Saturation can occur if 29 Feb is mapped to non-leap year.
    template <typename DateTime>
    requires std::is_same_v<DateTime, UInt32> || std::is_same_v<DateTime, Int64> || std::is_same_v<DateTime, time_t>
-    inline Time addYears(DateTime t, Int64 delta) const
+    Time addYears(DateTime t, Int64 delta) const
    {
        auto result_day = addYearsIndex(t, delta);

@ -1464,7 +1464,7 @@ public:

    template <typename Date>
    requires std::is_same_v<Date, DayNum> || std::is_same_v<Date, ExtendedDayNum>
-    inline auto addYears(Date d, Int64 delta) const
+    auto addYears(Date d, Int64 delta) const
    {
        if constexpr (std::is_same_v<Date, DayNum>)
            return toDayNum(LUTIndexWithSaturation(addYearsIndex(d, delta)));
@ -1473,7 +1473,7 @@ public:
    }


-    inline std::string timeToString(Time t) const
+    std::string timeToString(Time t) const
    {
        DateTimeComponents components = toDateTimeComponents(t);

@ -1498,7 +1498,7 @@ public:
        return s;
    }

-    inline std::string dateToString(Time t) const
+    std::string dateToString(Time t) const
    {
        const Values & values = getValues(t);

@ -1516,7 +1516,7 @@ public:
        return s;
    }

-    inline std::string dateToString(ExtendedDayNum d) const
+    std::string dateToString(ExtendedDayNum d) const
    {
        const Values & values = getValues(d);

--- a/src/Common/FileRenamer.cpp
+++ b/src/Common/FileRenamer.cpp
@ -7,11 +7,19 @@
 #include <filesystem>
 #include <format>
 #include <map>
-#include <re2/re2.h>

 #include <boost/algorithm/string.hpp>
 #include <boost/algorithm/string/replace.hpp>

+#ifdef __clang__
+#  pragma clang diagnostic push
+#  pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
+#endif
+#include <re2/re2.h>
+#ifdef __clang__
+#  pragma clang diagnostic pop
+#endif
+
 namespace fs = std::filesystem;

 namespace DB
--- a/src/Common/HTTPHeaderFilter.cpp
+++ b/src/Common/HTTPHeaderFilter.cpp
@ -2,7 +2,14 @@
 #include <Common/StringUtils/StringUtils.h>
 #include <Common/Exception.h>

+#ifdef __clang__
+#  pragma clang diagnostic push
+#  pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
+#endif
 #include <re2/re2.h>
+#ifdef __clang__
+#  pragma clang diagnostic pop
+#endif

 namespace DB
 {
--- a/src/Common/HashTable/HashTableAllocator.h
+++ b/src/Common/HashTable/HashTableAllocator.h
@ -8,7 +8,7 @@
  * table, so it makes sense to pre-fault the pages so that page faults don't
  * interrupt the resize loop. Set the allocator parameter accordingly.
  */
-using HashTableAllocator = Allocator<true /* clear_memory */, true /* mmap_populate */>;
+using HashTableAllocator = Allocator<true /* clear_memory */, true /* populate */>;

 template <size_t initial_bytes = 64>
 using HashTableAllocatorWithStackMemory = AllocatorWithStackMemory<HashTableAllocator, initial_bytes>;
--- a/src/Common/JSONParsers/SimdJSONParser.h
+++ b/src/Common/JSONParsers/SimdJSONParser.h
@ -8,6 +8,9 @@
 #    include <base/defines.h>
 #    include <simdjson.h>
 #    include "ElementTypes.h"
+#    include <Common/PODArray_fwd.h>
+#    include <Common/PODArray.h>
+#    include <charconv>

 namespace DB
 {
@ -16,6 +19,254 @@ namespace ErrorCodes
    extern const int CANNOT_ALLOCATE_MEMORY;
 }

+/// Format elements of basic types into string.
+/// The original implementation is mini_formatter in simdjson.h. But it is not public API, so we
+/// add a implementation here.
+class SimdJSONBasicFormatter
+{
+public:
+    explicit SimdJSONBasicFormatter(PaddedPODArray<UInt8> & buffer_) : buffer(buffer_) {}
+    inline void comma() { oneChar(','); }
+    /** Start an array, prints [ **/
+    inline void startArray() { oneChar('['); }
+    /** End an array, prints ] **/
+    inline void endArray() { oneChar(']'); }
+    /** Start an array, prints { **/
+    inline void startObject() { oneChar('{'); }
+    /** Start an array, prints } **/
+    inline void endObject() { oneChar('}'); }
+    /** Prints a true **/
+    inline void trueAtom()
+    {
+        const char * s = "true";
+        buffer.insert(s, s + 4);
+    }
+    /** Prints a false **/
+    inline void falseAtom()
+    {
+        const char * s = "false";
+        buffer.insert(s, s + 5);
+    }
+    /** Prints a null **/
+    inline void nullAtom()
+    {
+        const char * s = "null";
+        buffer.insert(s, s + 4);
+    }
+    /** Prints a number **/
+    inline void number(int64_t x)
+    {
+        char number_buffer[24];
+        auto res = std::to_chars(number_buffer, number_buffer + sizeof(number_buffer), x);
+        buffer.insert(number_buffer, res.ptr);
+    }
+    /** Prints a number **/
+    inline void number(uint64_t x)
+    {
+        char number_buffer[24];
+        auto res = std::to_chars(number_buffer, number_buffer + sizeof(number_buffer), x);
+        buffer.insert(number_buffer, res.ptr);
+    }
+    /** Prints a number **/
+    inline void number(double x)
+    {
+        char number_buffer[24];
+        auto res = std::to_chars(number_buffer, number_buffer + sizeof(number_buffer), x);
+        buffer.insert(number_buffer, res.ptr);
+    }
+    /** Prints a key (string + colon) **/
+    inline void key(std::string_view unescaped)
+    {
+        string(unescaped);
+        oneChar(':');
+    }
+    /** Prints a string. The string is escaped as needed. **/
+    inline void string(std::string_view unescaped)
+    {
+        oneChar('\"');
+        size_t i = 0;
+        // Fast path for the case where we have no control character, no ", and no backslash.
+        // This should include most keys.
+        //
+        // We would like to use 'bool' but some compilers take offense to bitwise operation
+        // with bool types.
+        constexpr static char needs_escaping[] = {
+            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+        for (; i + 8 <= unescaped.length(); i += 8)
+        {
+            // Poor's man vectorization. This could get much faster if we used SIMD.
+            //
+            // It is not the case that replacing '|' with '||' would be neutral performance-wise.
+            if (needs_escaping[uint8_t(unescaped[i])] | needs_escaping[uint8_t(unescaped[i + 1])]
+                | needs_escaping[uint8_t(unescaped[i + 2])] | needs_escaping[uint8_t(unescaped[i + 3])]
+                | needs_escaping[uint8_t(unescaped[i + 4])] | needs_escaping[uint8_t(unescaped[i + 5])]
+                | needs_escaping[uint8_t(unescaped[i + 6])] | needs_escaping[uint8_t(unescaped[i + 7])])
+            {
+                break;
+            }
+        }
+        for (; i < unescaped.length(); i++)
+        {
+            if (needs_escaping[uint8_t(unescaped[i])])
+            {
+                break;
+            }
+        }
+        // The following is also possible and omits a 256-byte table, but it is slower:
+        // for (; (i < unescaped.length()) && (uint8_t(unescaped[i]) > 0x1F)
+        //      && (unescaped[i] != '\"') && (unescaped[i] != '\\'); i++) {}
+
+        // At least for long strings, the following should be fast. We could
+        // do better by integrating the checks and the insertion.
+        buffer.insert(unescaped.data(), unescaped.data() + i);
+        // We caught a control character if we enter this loop (slow).
+        // Note that we are do not restart from the beginning, but rather we continue
+        // from the point where we encountered something that requires escaping.
+        for (; i < unescaped.length(); i++)
+        {
+            switch (unescaped[i])
+            {
+                case '\"': {
+                    const char * s = "\\\"";
+                    buffer.insert(s, s + 2);
+                }
+                break;
+                case '\\': {
+                    const char * s = "\\\\";
+                    buffer.insert(s, s + 2);
+                }
+                break;
+                default:
+                    if (uint8_t(unescaped[i]) <= 0x1F)
+                    {
+                        // If packed, this uses 8 * 32 bytes.
+                        // Note that we expect most compilers to embed this code in the data
+                        // section.
+                        constexpr static simdjson::escape_sequence escaped[32] = {
+                            {6, "\\u0000"}, {6, "\\u0001"}, {6, "\\u0002"}, {6, "\\u0003"}, {6, "\\u0004"}, {6, "\\u0005"}, {6, "\\u0006"},
+                            {6, "\\u0007"}, {2, "\\b"},     {2, "\\t"},     {2, "\\n"},     {6, "\\u000b"}, {2, "\\f"},     {2, "\\r"},
+                            {6, "\\u000e"}, {6, "\\u000f"}, {6, "\\u0010"}, {6, "\\u0011"}, {6, "\\u0012"}, {6, "\\u0013"}, {6, "\\u0014"},
+                            {6, "\\u0015"}, {6, "\\u0016"}, {6, "\\u0017"}, {6, "\\u0018"}, {6, "\\u0019"}, {6, "\\u001a"}, {6, "\\u001b"},
+                            {6, "\\u001c"}, {6, "\\u001d"}, {6, "\\u001e"}, {6, "\\u001f"}};
+                        auto u = escaped[uint8_t(unescaped[i])];
+                        buffer.insert(u.string, u.string + u.length);
+                    }
+                    else
+                    {
+                        oneChar(unescaped[i]);
+                    }
+            } // switch
+        } // for
+        oneChar('\"');
+    }
+
+    inline void oneChar(char c)
+    {
+        buffer.push_back(c);
+    }
+private:
+    PaddedPODArray<UInt8> & buffer;
+
+};
+
+
+/// Format object elements into string, element, array, object, kv-pair.
+/// Similar to string_builder in simdjson.h.
+class SimdJSONElementFormatter
+{
+public:
+    explicit SimdJSONElementFormatter(PaddedPODArray<UInt8> & buffer_) : format(buffer_) {}
+    /** Append an element to the builder (to be printed) **/
+    inline void append(simdjson::dom::element value)
+    {
+        switch (value.type())
+        {
+            case simdjson::dom::element_type::UINT64: {
+                format.number(value.get_uint64().value_unsafe());
+                break;
+            }
+            case simdjson::dom::element_type::INT64: {
+                format.number(value.get_int64().value_unsafe());
+                break;
+            }
+            case simdjson::dom::element_type::DOUBLE: {
+                format.number(value.get_double().value_unsafe());
+                break;
+            }
+            case simdjson::dom::element_type::STRING: {
+                format.string(value.get_string().value_unsafe());
+                break;
+            }
+            case simdjson::dom::element_type::BOOL: {
+                if (value.get_bool().value_unsafe())
+                    format.trueAtom();
+                else
+                    format.falseAtom();
+                break;
+            }
+            case simdjson::dom::element_type::NULL_VALUE: {
+                format.nullAtom();
+                break;
+            }
+            case simdjson::dom::element_type::ARRAY: {
+                append(value.get_array().value_unsafe());
+                break;
+            }
+            case simdjson::dom::element_type::OBJECT: {
+                append(value.get_object().value_unsafe());
+                break;
+            }
+        }
+    }
+    /** Append an array to the builder (to be printed) **/
+    inline void append(simdjson::dom::array value)
+    {
+        format.startArray();
+        auto iter = value.begin();
+        auto end = value.end();
+        if (iter != end)
+        {
+            append(*iter);
+            for (++iter; iter != end; ++iter)
+            {
+                format.comma();
+                append(*iter);
+            }
+        }
+        format.endArray();
+    }
+
+    inline void append(simdjson::dom::object value)
+    {
+        format.startObject();
+        auto pair = value.begin();
+        auto end = value.end();
+        if (pair != end)
+        {
+            append(*pair);
+            for (++pair; pair != end; ++pair)
+            {
+                format.comma();
+                append(*pair);
+            }
+        }
+        format.endObject();
+    }
+
+    inline void append(simdjson::dom::key_value_pair kv)
+    {
+        format.key(kv.key);
+        append(kv.value);
+    }
+private:
+    SimdJSONBasicFormatter format;
+};
+
 /// This class can be used as an argument for the template class FunctionJSON.
 /// It provides ability to parse JSONs using simdjson library.
 struct SimdJSONParser
--- a/src/Common/OptimizedRegularExpression.cpp
+++ b/src/Common/OptimizedRegularExpression.cpp
@ -441,8 +441,7 @@ finish:
 }
 }

-template <bool thread_safe>
-void OptimizedRegularExpressionImpl<thread_safe>::analyze(
+void OptimizedRegularExpression::analyze(
        std::string_view regexp_,
        std::string & required_substring,
        bool & is_trivial,
@ -467,8 +466,7 @@ catch (...)
    LOG_ERROR(&Poco::Logger::get("OptimizeRegularExpression"), "Analyze RegularExpression failed, got error: {}", DB::getCurrentExceptionMessage(false));
 }

-template <bool thread_safe>
-OptimizedRegularExpressionImpl<thread_safe>::OptimizedRegularExpressionImpl(const std::string & regexp_, int options)
+OptimizedRegularExpression::OptimizedRegularExpression(const std::string & regexp_, int options)
 {
    std::vector<std::string> alternatives_dummy; /// this vector extracts patterns a,b,c from pattern (a|b|c). for now it's not used.
    analyze(regexp_, required_substring, is_trivial, required_substring_is_prefix, alternatives_dummy);
@ -486,7 +484,7 @@ OptimizedRegularExpressionImpl<thread_safe>::OptimizedRegularExpressionImpl(cons
    if (!is_trivial)
    {
        /// Compile the re2 regular expression.
-        typename RegexType::Options regexp_options;
+        typename re2::RE2::Options regexp_options;

        /// Never write error messages to stderr. It's ignorant to do it from library code.
        regexp_options.set_log_errors(false);
@ -497,7 +495,7 @@ OptimizedRegularExpressionImpl<thread_safe>::OptimizedRegularExpressionImpl(cons
        if (is_dot_nl)
            regexp_options.set_dot_nl(true);

-        re2 = std::make_unique<RegexType>(regexp_, regexp_options);
+        re2 = std::make_unique<re2::RE2>(regexp_, regexp_options);
        if (!re2->ok())
        {
            throw DB::Exception(DB::ErrorCodes::CANNOT_COMPILE_REGEXP,
@ -527,8 +525,7 @@ OptimizedRegularExpressionImpl<thread_safe>::OptimizedRegularExpressionImpl(cons
    }
 }

-template <bool thread_safe>
-OptimizedRegularExpressionImpl<thread_safe>::OptimizedRegularExpressionImpl(OptimizedRegularExpressionImpl && rhs) noexcept
+OptimizedRegularExpression::OptimizedRegularExpression(OptimizedRegularExpression && rhs) noexcept
    : is_trivial(rhs.is_trivial)
    , required_substring_is_prefix(rhs.required_substring_is_prefix)
    , is_case_insensitive(rhs.is_case_insensitive)
@ -545,8 +542,7 @@ OptimizedRegularExpressionImpl<thread_safe>::OptimizedRegularExpressionImpl(Opti
    }
 }

-template <bool thread_safe>
-bool OptimizedRegularExpressionImpl<thread_safe>::match(const char * subject, size_t subject_size) const
+bool OptimizedRegularExpression::match(const char * subject, size_t subject_size) const
 {
    const UInt8 * haystack = reinterpret_cast<const UInt8 *>(subject);
    const UInt8 * haystack_end = haystack + subject_size;
@ -577,13 +573,12 @@ bool OptimizedRegularExpressionImpl<thread_safe>::match(const char * subject, si
            }
        }

-        return re2->Match({subject, subject_size}, 0, subject_size, RegexType::UNANCHORED, nullptr, 0);
+        return re2->Match({subject, subject_size}, 0, subject_size, re2::RE2::UNANCHORED, nullptr, 0);
    }
 }


-template <bool thread_safe>
-bool OptimizedRegularExpressionImpl<thread_safe>::match(const char * subject, size_t subject_size, Match & match) const
+bool OptimizedRegularExpression::match(const char * subject, size_t subject_size, Match & match) const
 {
    const UInt8 * haystack = reinterpret_cast<const UInt8 *>(subject);
    const UInt8 * haystack_end = haystack + subject_size;
@ -624,7 +619,7 @@ bool OptimizedRegularExpressionImpl<thread_safe>::match(const char * subject, si

        std::string_view piece;

-        if (!RegexType::PartialMatch({subject, subject_size}, *re2, &piece))
+        if (!re2::RE2::PartialMatch({subject, subject_size}, *re2, &piece))
            return false;
        else
        {
@ -636,8 +631,7 @@ bool OptimizedRegularExpressionImpl<thread_safe>::match(const char * subject, si
 }


-template <bool thread_safe>
-unsigned OptimizedRegularExpressionImpl<thread_safe>::match(const char * subject, size_t subject_size, MatchVec & matches, unsigned limit) const
+unsigned OptimizedRegularExpression::match(const char * subject, size_t subject_size, MatchVec & matches, unsigned limit) const
 {
    const UInt8 * haystack = reinterpret_cast<const UInt8 *>(subject);
    const UInt8 * haystack_end = haystack + subject_size;
@ -695,7 +689,7 @@ unsigned OptimizedRegularExpressionImpl<thread_safe>::match(const char * subject
            {subject, subject_size},
            0,
            subject_size,
-            RegexType::UNANCHORED,
+            re2::RE2::UNANCHORED,
            pieces.data(),
            static_cast<int>(pieces.size())))
        {
@ -721,6 +715,3 @@ unsigned OptimizedRegularExpressionImpl<thread_safe>::match(const char * subject
        }
    }
 }
-
-template class OptimizedRegularExpressionImpl<true>;
-template class OptimizedRegularExpressionImpl<false>;
--- a/src/Common/OptimizedRegularExpression.h
+++ b/src/Common/OptimizedRegularExpression.h
@ -6,9 +6,15 @@
 #include <optional>
 #include <Common/StringSearcher.h>
 #include "config.h"
-#include <re2/re2.h>
-#include <re2_st/re2.h>

+#ifdef __clang__
+#  pragma clang diagnostic push
+#  pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
+#endif
+#include <re2/re2.h>
+#ifdef __clang__
+#  pragma clang diagnostic pop
+#endif

 /** Uses two ways to optimize a regular expression:
  * 1. If the regular expression is trivial (reduces to finding a substring in a string),
@ -37,8 +43,7 @@ namespace OptimizedRegularExpressionDetails
    };
 }

-template <bool thread_safe>
-class OptimizedRegularExpressionImpl
+class OptimizedRegularExpression
 {
 public:
    enum Options
@ -51,12 +56,10 @@ public:
    using Match = OptimizedRegularExpressionDetails::Match;
    using MatchVec = std::vector<Match>;

-    using RegexType = std::conditional_t<thread_safe, re2::RE2, re2_st::RE2>;
-
-    OptimizedRegularExpressionImpl(const std::string & regexp_, int options = 0); /// NOLINT
+    OptimizedRegularExpression(const std::string & regexp_, int options = 0); /// NOLINT
    /// StringSearcher store pointers to required_substring, it must be updated on move.
-    OptimizedRegularExpressionImpl(OptimizedRegularExpressionImpl && rhs) noexcept;
-    OptimizedRegularExpressionImpl(const OptimizedRegularExpressionImpl & rhs) = delete;
+    OptimizedRegularExpression(OptimizedRegularExpression && rhs) noexcept;
+    OptimizedRegularExpression(const OptimizedRegularExpression & rhs) = delete;

    bool match(const std::string & subject) const
    {
@ -85,7 +88,7 @@ public:
    unsigned getNumberOfSubpatterns() const { return number_of_subpatterns; }

    /// Get the regexp re2 or nullptr if the pattern is trivial (for output to the log).
-    const std::unique_ptr<RegexType> & getRE2() const { return re2; }
+    const std::unique_ptr<re2::RE2> & getRE2() const { return re2; }

    void getAnalyzeResult(std::string & out_required_substring, bool & out_is_trivial, bool & out_required_substring_is_prefix) const
    {
@ -110,9 +113,6 @@ private:
    std::string required_substring;
    std::optional<DB::ASCIICaseSensitiveStringSearcher> case_sensitive_substring_searcher;
    std::optional<DB::ASCIICaseInsensitiveStringSearcher> case_insensitive_substring_searcher;
-    std::unique_ptr<RegexType> re2;
+    std::unique_ptr<re2::RE2> re2;
    unsigned number_of_subpatterns;
 };
-
-using OptimizedRegularExpression = OptimizedRegularExpressionImpl<true>;
-using OptimizedRegularExpressionSingleThreaded = OptimizedRegularExpressionImpl<false>;
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@ -253,6 +253,13 @@ The server successfully detected this situation and will download merged part fr
    M(PolygonsAddedToPool, "A polygon has been added to the cache (pool) for the 'pointInPolygon' function.") \
    M(PolygonsInPoolAllocatedBytes, "The number of bytes for polygons added to the cache (pool) for the 'pointInPolygon' function.") \
    \
+    M(USearchAddCount, "Number of vectors added to usearch indexes.") \
+    M(USearchAddVisitedMembers, "Number of nodes visited when adding vectors to usearch indexes.") \
+    M(USearchAddComputedDistances, "Number of times distance was computed when adding vectors to usearch indexes.") \
+    M(USearchSearchCount, "Number of search operations performed in usearch indexes.") \
+    M(USearchSearchVisitedMembers, "Number of nodes visited when searching in usearch indexes.") \
+    M(USearchSearchComputedDistances, "Number of times distance was computed when searching usearch indexes.") \
+    \
    M(RWLockAcquiredReadLocks, "Number of times a read lock was acquired (in a heavy RWLock).") \
    M(RWLockAcquiredWriteLocks, "Number of times a write lock was acquired (in a heavy RWLock).") \
    M(RWLockReadersWaitMilliseconds, "Total time spent waiting for a read lock to be acquired (in a heavy RWLock).") \
--- a/src/Common/RemoteHostFilter.cpp
+++ b/src/Common/RemoteHostFilter.cpp
@ -1,4 +1,3 @@
-#include <re2/re2.h>
 #include <Poco/URI.h>
 #include <Poco/Util/AbstractConfiguration.h>
 #include <Common/RemoteHostFilter.h>
@ -6,6 +5,14 @@
 #include <Common/Exception.h>
 #include <IO/WriteHelpers.h>

+#ifdef __clang__
+#  pragma clang diagnostic push
+#  pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
+#endif
+#include <re2/re2.h>
+#ifdef __clang__
+#  pragma clang diagnostic pop
+#endif

 namespace DB
 {
--- a/src/Common/SensitiveDataMasker.cpp
+++ b/src/Common/SensitiveDataMasker.cpp
@ -4,7 +4,14 @@
 #include <string>
 #include <atomic>

+#ifdef __clang__
+#  pragma clang diagnostic push
+#  pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
+#endif
 #include <re2/re2.h>
+#ifdef __clang__
+#  pragma clang diagnostic pop
+#endif

 #include <Poco/Util/AbstractConfiguration.h>

--- a/src/Common/ZooKeeper/ZooKeeper.cpp
+++ b/src/Common/ZooKeeper/ZooKeeper.cpp
@ -1466,7 +1466,7 @@ void validateZooKeeperConfig(const Poco::Util::AbstractConfiguration & config)

 bool hasZooKeeperConfig(const Poco::Util::AbstractConfiguration & config)
 {
-    return config.has("zookeeper") || config.has("keeper") || (config.has("keeper_server") && config.getBool("keeper_server.use_cluster", true));
+    return config.has("zookeeper") || config.has("keeper") || (config.has("keeper_server.raft_configuration") && config.getBool("keeper_server.use_cluster", true));
 }

 String getZooKeeperConfigName(const Poco::Util::AbstractConfiguration & config)
@ -1477,7 +1477,7 @@ String getZooKeeperConfigName(const Poco::Util::AbstractConfiguration & config)
    if (config.has("keeper"))
        return "keeper";

-    if (config.has("keeper_server") && config.getBool("keeper_server.use_cluster", true))
+    if (config.has("keeper_server.raft_configuration") && config.getBool("keeper_server.use_cluster", true))
        return "keeper_server";

    throw DB::Exception(DB::ErrorCodes::NO_ELEMENTS_IN_CONFIG, "There is no Zookeeper configuration in server config");
--- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp
+++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp
@ -701,7 +701,7 @@ void ZooKeeper::receiveThread()

            if (in->poll(max_wait_us))
            {
-                if (requests_queue.isFinished())
+                if (finalization_started.test())
                    break;

                receiveEvent();
--- a/src/Common/parseGlobs.cpp
+++ b/src/Common/parseGlobs.cpp
@ -2,11 +2,18 @@
 #include <IO/WriteBufferFromString.h>
 #include <IO/ReadBufferFromString.h>
 #include <IO/Operators.h>
-#include <re2/re2.h>
 #include <algorithm>
 #include <sstream>
 #include <iomanip>

+#ifdef __clang__
+#  pragma clang diagnostic push
+#  pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
+#endif
+#include <re2/re2.h>
+#ifdef __clang__
+#  pragma clang diagnostic pop
+#endif

 namespace DB
 {
--- a/src/Common/tests/gtest_makeRegexpPatternFromGlobs.cpp
+++ b/src/Common/tests/gtest_makeRegexpPatternFromGlobs.cpp
@ -1,7 +1,14 @@
 #include <Common/parseGlobs.h>
-#include <re2/re2.h>
 #include <gtest/gtest.h>

+#ifdef __clang__
+#  pragma clang diagnostic push
+#  pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
+#endif
+#include <re2/re2.h>
+#ifdef __clang__
+#  pragma clang diagnostic pop
+#endif

 using namespace DB;

--- a/src/Coordination/Changelog.cpp
+++ b/src/Coordination/Changelog.cpp
@ -586,13 +586,15 @@ private:
    std::unique_ptr<ReadBuffer> read_buf;
 };

-Changelog::Changelog(Poco::Logger * log_, LogFileSettings log_file_settings, KeeperContextPtr keeper_context_)
+Changelog::Changelog(
+    Poco::Logger * log_, LogFileSettings log_file_settings, FlushSettings flush_settings_, KeeperContextPtr keeper_context_)
    : changelogs_detached_dir("detached")
    , rotate_interval(log_file_settings.rotate_interval)
    , log(log_)
    , write_operations(std::numeric_limits<size_t>::max())
    , append_completion_queue(std::numeric_limits<size_t>::max())
    , keeper_context(std::move(keeper_context_))
+    , flush_settings(flush_settings_)
 {
    if (auto latest_log_disk = getLatestLogDisk();
        log_file_settings.force_sync && dynamic_cast<const DiskLocal *>(latest_log_disk.get()) == nullptr)
@ -1014,8 +1016,65 @@ void Changelog::writeThread()
 {
    WriteOperation write_operation;
    bool batch_append_ok = true;
-    while (write_operations.pop(write_operation))
+    size_t pending_appends = 0;
+    bool try_batch_flush = false;
+
+    const auto flush_logs = [&](const auto & flush)
    {
+        LOG_TEST(log, "Flushing {} logs", pending_appends);
+
+        {
+            std::lock_guard writer_lock(writer_mutex);
+            current_writer->flush();
+        }
+
+        {
+            std::lock_guard lock{durable_idx_mutex};
+            last_durable_idx = flush.index;
+        }
+
+        pending_appends = 0;
+    };
+
+    const auto notify_append_completion = [&]
+    {
+        durable_idx_cv.notify_all();
+
+        // we need to call completion callback in another thread because it takes a global lock for the NuRaft server
+        // NuRaft will in some places wait for flush to be done while having the same global lock leading to deadlock
+        // -> future write operations are blocked by flush that cannot be completed because it cannot take NuRaft lock
+        // -> NuRaft won't leave lock until its flush is done
+        if (!append_completion_queue.push(batch_append_ok))
+            LOG_WARNING(log, "Changelog is shut down");
+    };
+
+    /// NuRaft writes a batch of request by first calling multiple store requests, i.e. AppendLog
+    /// finished by a flush request
+    /// We assume that after some number of appends, we always get flush request
+    while (true)
+    {
+        if (try_batch_flush)
+        {
+            try_batch_flush = false;
+            /// we have Flush request stored in write operation
+            /// but we try to get new append operations
+            /// if there are none, we apply the currently set Flush
+            chassert(std::holds_alternative<Flush>(write_operation));
+            if (!write_operations.tryPop(write_operation))
+            {
+                chassert(batch_append_ok);
+                const auto & flush = std::get<Flush>(write_operation);
+                flush_logs(flush);
+                notify_append_completion();
+                if (!write_operations.pop(write_operation))
+                    break;
+            }
+        }
+        else if (!write_operations.pop(write_operation))
+        {
+            break;
+        }
+
        assert(initialized);

        if (auto * append_log = std::get_if<AppendLog>(&write_operation))
@ -1027,6 +1086,7 @@ void Changelog::writeThread()
            assert(current_writer);

            batch_append_ok = current_writer->appendRecord(buildRecord(append_log->index, append_log->log_entry));
+            ++pending_appends;
        }
        else
        {
@ -1034,30 +1094,21 @@ void Changelog::writeThread()

            if (batch_append_ok)
            {
+                /// we can try batching more logs for flush
+                if (pending_appends < flush_settings.max_flush_batch_size)
                {
-                    std::lock_guard writer_lock(writer_mutex);
-                    current_writer->flush();
-                }
-
-                {
-                    std::lock_guard lock{durable_idx_mutex};
-                    last_durable_idx = flush.index;
+                    try_batch_flush = true;
+                    continue;
                }
+                /// we need to flush because we have maximum allowed pending records
+                flush_logs(flush);
            }
            else
            {
+                std::lock_guard lock{durable_idx_mutex};
                *flush.failed = true;
            }
-
-            durable_idx_cv.notify_all();
-
-            // we need to call completion callback in another thread because it takes a global lock for the NuRaft server
-            // NuRaft will in some places wait for flush to be done while having the same global lock leading to deadlock
-            // -> future write operations are blocked by flush that cannot be completed because it cannot take NuRaft lock
-            // -> NuRaft won't leave lock until its flush is done
-            if (!append_completion_queue.push(batch_append_ok))
-                LOG_WARNING(log, "Changelog is shut down");
-
+            notify_append_completion();
            batch_append_ok = true;
        }
    }
--- a/src/Coordination/Changelog.h
+++ b/src/Coordination/Changelog.h
@ -82,6 +82,11 @@ struct LogFileSettings
    uint64_t overallocate_size = 0;
 };

+struct FlushSettings
+{
+    uint64_t max_flush_batch_size = 1000;
+};
+
 /// Simplest changelog with files rotation.
 /// No compression, no metadata, just entries with headers one by one.
 /// Able to read broken files/entries and discard them. Not thread safe.
@ -91,6 +96,7 @@ public:
    Changelog(
        Poco::Logger * log_,
        LogFileSettings log_file_settings,
+        FlushSettings flush_settings,
        KeeperContextPtr keeper_context_);

    Changelog(Changelog &&) = delete;
@ -229,6 +235,8 @@ private:

    KeeperContextPtr keeper_context;

+    const FlushSettings flush_settings;
+
    bool initialized = false;
 };

--- a/src/Coordination/CoordinationSettings.cpp
+++ b/src/Coordination/CoordinationSettings.cpp
@ -134,6 +134,8 @@ void KeeperConfigurationAndSettings::dump(WriteBufferFromOwnString & buf) const
    write_int(coordination_settings->max_requests_batch_size);
    writeText("max_requests_batch_bytes_size=", buf);
    write_int(coordination_settings->max_requests_batch_bytes_size);
+    writeText("max_flush_batch_size=", buf);
+    write_int(coordination_settings->max_flush_batch_size);
    writeText("max_request_queue_size=", buf);
    write_int(coordination_settings->max_request_queue_size);
    writeText("max_requests_quick_batch_size=", buf);
@ -152,6 +154,9 @@ void KeeperConfigurationAndSettings::dump(WriteBufferFromOwnString & buf) const

    writeText("raft_limits_reconnect_limit=", buf);
    write_int(static_cast<uint64_t>(coordination_settings->raft_limits_reconnect_limit));
+
+    writeText("async_replication=", buf);
+    write_bool(coordination_settings->async_replication);
 }

 KeeperConfigurationAndSettingsPtr
--- a/Show More
+++ b/Show More