Merge remote-tracking branch 'upstream/master' into better-tests-for-data-lakes

2024-09-20 08:40:50 +00:00 · 2023-03-28 15:39:24 +02:00 · 2023-03-28 15:39:24 +02:00 · 04b28bf822
commit 04b28bf822
parent 36cc6fee51 21fdb67e03
955 changed files with 23555 additions and 10642 deletions
--- a/.clang-tidy
+++ b/.clang-tidy
@ -41,6 +41,8 @@ Checks: '*,
    -clang-analyzer-security.insecureAPI.strcpy,

    -cppcoreguidelines-avoid-c-arrays,
+    -cppcoreguidelines-avoid-const-or-ref-data-members,
+    -cppcoreguidelines-avoid-do-while,
    -cppcoreguidelines-avoid-goto,
    -cppcoreguidelines-avoid-magic-numbers,
    -cppcoreguidelines-avoid-non-const-global-variables,
@ -128,6 +130,7 @@ Checks: '*,
    -portability-simd-intrinsics,

    -readability-braces-around-statements,
+    -readability-convert-member-functions-to-static,
    -readability-else-after-return,
    -readability-function-cognitive-complexity,
    -readability-function-size,
--- a/.github/workflows/backport_branches.yml
+++ b/.github/workflows/backport_branches.yml
@ -9,8 +9,22 @@ on: # yamllint disable-line rule:truthy
    branches:
      - 'backport/**'
 jobs:
+  CheckLabels:
+    runs-on: [self-hosted, style-checker]
+    # Run the first check always, even if the CI is cancelled
+    if: ${{ always() }}
+    steps:
+      - name: Check out repository code
+        uses: ClickHouse/checkout@v1
+        with:
+          clear-repository: true
+      - name: Labels check
+        run: |
+          cd "$GITHUB_WORKSPACE/tests/ci"
+          python3 run_check.py
  PythonUnitTests:
    runs-on: [self-hosted, style-checker]
+    needs: CheckLabels
    steps:
      - name: Check out repository code
        uses: ClickHouse/checkout@v1
@ -22,6 +36,7 @@ jobs:
          python3 -m unittest discover -s . -p '*_test.py'
  DockerHubPushAarch64:
    runs-on: [self-hosted, style-checker-aarch64]
+    needs: CheckLabels
    steps:
      - name: Check out repository code
        uses: ClickHouse/checkout@v1
@ -38,6 +53,7 @@ jobs:
          path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json
  DockerHubPushAmd64:
    runs-on: [self-hosted, style-checker]
+    needs: CheckLabels
    steps:
      - name: Check out repository code
        uses: ClickHouse/checkout@v1
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -121,6 +121,7 @@ if (ENABLE_COLORED_BUILD AND CMAKE_GENERATOR STREQUAL "Ninja")
    set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fdiagnostics-color=always")
    # ... such manually setting of flags can be removed once CMake supports a variable to
    # activate colors in *all* build systems: https://gitlab.kitware.com/cmake/cmake/-/issues/15502
+    # --> available since CMake 3.24: https://stackoverflow.com/a/73349744
 endif ()

 include (cmake/check_flags.cmake)
@ -134,24 +135,15 @@ if (COMPILER_CLANG)
        set(COMPILER_FLAGS "${COMPILER_FLAGS} -gdwarf-aranges")
    endif ()

-    if (HAS_USE_CTOR_HOMING)
-        # For more info see https://blog.llvm.org/posts/2021-04-05-constructor-homing-for-debug-info/
-        if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO")
-            set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xclang -fuse-ctor-homing")
-            set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Xclang -fuse-ctor-homing")
-        endif()
+    # See https://blog.llvm.org/posts/2021-04-05-constructor-homing-for-debug-info/
+    if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO")
+        set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xclang -fuse-ctor-homing")
+        set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Xclang -fuse-ctor-homing")
    endif()

    no_warning(enum-constexpr-conversion) # breaks Protobuf in clang-16
 endif ()

-# If compiler has support for -Wreserved-identifier. It is difficult to detect by clang version,
-# because there are two different branches of clang: clang and AppleClang.
-# (AppleClang is not supported by ClickHouse, but some developers have misfortune to use it).
-if (HAS_RESERVED_IDENTIFIER)
-    add_compile_definitions (HAS_RESERVED_IDENTIFIER)
-endif ()
-
 option(ENABLE_TESTS "Provide unit_test_dbms target with Google.Test unit tests" ON)
 option(ENABLE_EXAMPLES "Build all example programs in 'examples' subdirectories" OFF)
 option(ENABLE_BENCHMARKS "Build all benchmark programs in 'benchmarks' subdirectories" OFF)
@ -188,7 +180,6 @@ if (NOT CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE")
    # Can be lld or ld-lld or lld-13 or /path/to/lld.
    if (LINKER_NAME MATCHES "lld" AND OS_LINUX)
        set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gdb-index")
-        set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--gdb-index")
        message (STATUS "Adding .gdb-index via --gdb-index linker option.")
    endif ()
 endif()
@ -297,6 +288,7 @@ set (CMAKE_C_STANDARD_REQUIRED ON)

 if (COMPILER_GCC OR COMPILER_CLANG)
    # Enable C++14 sized global deallocation functions. It should be enabled by setting -std=c++14 but I'm not sure.
+    # See https://reviews.llvm.org/D112921
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsized-deallocation")
 endif ()

@ -315,11 +307,7 @@ if (ARCH_AMD64)
        set(BRANCHES_WITHIN_32B_BOUNDARIES "-Wa,${BRANCHES_WITHIN_32B_BOUNDARIES}")
    endif()

-    include(CheckCXXCompilerFlag)
-    check_cxx_compiler_flag("${BRANCHES_WITHIN_32B_BOUNDARIES}" HAS_BRANCHES_WITHIN_32B_BOUNDARIES)
-    if (HAS_BRANCHES_WITHIN_32B_BOUNDARIES)
-        set(COMPILER_FLAGS "${COMPILER_FLAGS} ${BRANCHES_WITHIN_32B_BOUNDARIES}")
-    endif()
+    set(COMPILER_FLAGS "${COMPILER_FLAGS} ${BRANCHES_WITHIN_32B_BOUNDARIES}")
 endif()

 if (COMPILER_GCC)
@ -361,7 +349,17 @@ set (CMAKE_ASM_FLAGS_DEBUG               "${CMAKE_ASM_FLAGS_DEBUG} -O0 ${DEBUG_I
 if (COMPILER_CLANG)
    if (OS_DARWIN)
        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++")
+
        set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-U,_inside_main")
+
+        # The LLVM MachO linker (ld64.lld, used in native builds) generates by default unwind info in 'compact' format which the internal
+        # unwinder doesn't support and the server will not come up ('invalid compact unwind encoding'). Disable it. You will see warning
+        # during the build "ld64.lld: warning: Option `-no_compact_unwind' is undocumented. Should lld implement it?". Yes, ld64.lld does
+        # not document the option, likely for compat with Apple's system ld after which ld64.lld is modeled after and which also does not
+        # document it.
+        if (NOT CMAKE_CROSSCOMPILING)
+            set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-no_compact_unwind")
+        endif ()
    endif()

    # Display absolute paths in error messages. Otherwise KDevelop fails to navigate to correct file and opens a new file instead.
@ -431,6 +429,7 @@ option(WERROR "Enable -Werror compiler option" ON)
 if (WERROR)
    # Don't pollute CMAKE_CXX_FLAGS with -Werror as it will break some CMake checks.
    # Instead, adopt modern cmake usage requirement.
+    # TODO: Set CMAKE_COMPILE_WARNING_AS_ERROR (cmake 3.24)
    target_compile_options(global-group INTERFACE "-Werror")
 endif ()

--- a/PreLoad.cmake
+++ b/PreLoad.cmake
@ -19,8 +19,8 @@ endif()
 if (NOT "$ENV{CFLAGS}" STREQUAL ""
    OR NOT "$ENV{CXXFLAGS}" STREQUAL ""
    OR NOT "$ENV{LDFLAGS}" STREQUAL ""
-    OR CMAKE_C_FLAGS OR CMAKE_CXX_FLAGS OR CMAKE_EXE_LINKER_FLAGS OR CMAKE_SHARED_LINKER_FLAGS OR CMAKE_MODULE_LINKER_FLAGS
-    OR CMAKE_C_FLAGS_INIT OR CMAKE_CXX_FLAGS_INIT OR CMAKE_EXE_LINKER_FLAGS_INIT OR CMAKE_SHARED_LINKER_FLAGS_INIT OR CMAKE_MODULE_LINKER_FLAGS_INIT)
+    OR CMAKE_C_FLAGS OR CMAKE_CXX_FLAGS OR CMAKE_EXE_LINKER_FLAGS OR CMAKE_MODULE_LINKER_FLAGS
+    OR CMAKE_C_FLAGS_INIT OR CMAKE_CXX_FLAGS_INIT OR CMAKE_EXE_LINKER_FLAGS_INIT OR CMAKE_MODULE_LINKER_FLAGS_INIT)

    # if $ENV
    message("CFLAGS: $ENV{CFLAGS}")
@ -36,7 +36,6 @@ if (NOT "$ENV{CFLAGS}" STREQUAL ""
    message("CMAKE_C_FLAGS_INIT: ${CMAKE_C_FLAGS_INIT}")
    message("CMAKE_CXX_FLAGS_INIT: ${CMAKE_CXX_FLAGS_INIT}")
    message("CMAKE_EXE_LINKER_FLAGS_INIT: ${CMAKE_EXE_LINKER_FLAGS_INIT}")
-    message("CMAKE_SHARED_LINKER_FLAGS_INIT: ${CMAKE_SHARED_LINKER_FLAGS_INIT}")
    message("CMAKE_MODULE_LINKER_FLAGS_INIT: ${CMAKE_MODULE_LINKER_FLAGS_INIT}")

    message(FATAL_ERROR "
--- a/README.md
+++ b/README.md
@ -21,11 +21,10 @@ curl https://clickhouse.com/ | sh
 * [Contacts](https://clickhouse.com/company/contact) can help to get your questions answered if there are any.

 ## Upcoming Events
-* [**v23.2 Release Webinar**](https://clickhouse.com/company/events/v23-2-release-webinar?utm_source=github&utm_medium=social&utm_campaign=release-webinar-2023-02) - Feb 23 - 23.2 is rapidly approaching. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release.
-* [**ClickHouse Meetup in Amsterdam**](https://www.meetup.com/clickhouse-netherlands-user-group/events/291485868/) - Mar 9 - The first ClickHouse Amsterdam Meetup of 2023 is here! 🎉 Join us for short lightning talks and long discussions. Food, drinks & good times on us.
-* [**ClickHouse Meetup in SF Bay Area**](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/291490121/) - Mar 14 - A night to meet with ClickHouse team in the San Francisco area! Food and drink are a given...but networking is the primary focus.
-* [**ClickHouse Meetup in Austin**](https://www.meetup.com/clickhouse-austin-user-group/events/291486654/) - Mar 16 - The first ClickHouse Meetup in Austin is happening soon! Interested in speaking, let us know!
+* [**ClickHouse Meetup in Austin**](https://www.meetup.com/clickhouse-austin-user-group/events/291486654/) - Mar 30 - The first ClickHouse Meetup in Austin is happening soon! Interested in speaking, let us know!
+* [**v23.3 Release Webinar**](https://clickhouse.com/company/events/v23-3-release-webinar?utm_source=github&utm_medium=social&utm_campaign=release-webinar-2023-02) - Mar 30 - 23.3 is rapidly approaching. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release.

 ## Recent Recordings
 * **FOSDEM 2023**: In the "Fast and Streaming Data" room Alexey gave a talk entitled "Building Analytical Apps With ClickHouse" that looks at the landscape of data tools, an interesting data set, and how you can interact with data quickly. Check out the recording on **[YouTube](https://www.youtube.com/watch?v=JlcI2Vfz_uk)**.
-* **Recording available**: [**v23.1 Release Webinar**](https://www.youtube.com/watch?v=zYSZXBnTMSE) 23.1 is the ClickHouse New Year release. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release. Inverted indices, query cache, and so -- very -- much more.
+* **Recording available**: [**v23.2 Release Webinar**](https://www.youtube.com/watch?v=2o0vRMMIrkY) NTILE Window Function support, Partition Key for GROUP By, io_uring, Apache Iceberg support, Dynamic Disks, integrations updates! Watch it now!
+* **All release webinar recordings**: [YouTube playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3jAlSy1JxyP8zluvXaN3nxU)
--- a/base/base/coverage.cpp
+++ b/base/base/coverage.cpp
@ -2,6 +2,8 @@

 #if WITH_COVERAGE

+#pragma GCC diagnostic ignored "-Wreserved-identifier"
+
 #    include <mutex>
 #    include <unistd.h>

--- a/base/base/hex.h
+++ b/base/base/hex.h
@ -1,5 +1,6 @@
 #pragma once

+#include <bit>
 #include <cstring>
 #include "types.h"

--- a/base/base/phdr_cache.cpp
+++ b/base/base/phdr_cache.cpp
@ -1,6 +1,4 @@
-#ifdef HAS_RESERVED_IDENTIFIER
 #pragma clang diagnostic ignored "-Wreserved-identifier"
-#endif

 /// This code was based on the code by Fedor Korotkiy https://www.linkedin.com/in/fedor-korotkiy-659a1838/

--- a/base/base/unit.h
+++ b/base/base/unit.h
@ -5,10 +5,8 @@ constexpr size_t KiB = 1024;
 constexpr size_t MiB = 1024 * KiB;
 constexpr size_t GiB = 1024 * MiB;

-#ifdef HAS_RESERVED_IDENTIFIER
-#  pragma clang diagnostic push
-#  pragma clang diagnostic ignored "-Wreserved-identifier"
-#endif
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wreserved-identifier"

 // NOLINTBEGIN(google-runtime-int)
 constexpr size_t operator"" _KiB(unsigned long long val) { return val * KiB; }
@ -16,6 +14,4 @@ constexpr size_t operator"" _MiB(unsigned long long val) { return val * MiB; }
 constexpr size_t operator"" _GiB(unsigned long long val) { return val * GiB; }
 // NOLINTEND(google-runtime-int)

-#ifdef HAS_RESERVED_IDENTIFIER
-#  pragma clang diagnostic pop
-#endif
+#pragma clang diagnostic pop
--- a/base/base/wide_integer_impl.h
+++ b/base/base/wide_integer_impl.h
@ -732,9 +732,10 @@ public:
            if (std::numeric_limits<T>::is_signed && (is_negative(lhs) != is_negative(rhs)))
                return is_negative(rhs);

+            integer<Bits, Signed> t = rhs;
            for (unsigned i = 0; i < item_count; ++i)
            {
-                base_type rhs_item = get_item(rhs, big(i));
+                base_type rhs_item = get_item(t, big(i));

                if (lhs.items[big(i)] != rhs_item)
                    return lhs.items[big(i)] > rhs_item;
@ -757,9 +758,10 @@ public:
            if (std::numeric_limits<T>::is_signed && (is_negative(lhs) != is_negative(rhs)))
                return is_negative(lhs);

+            integer<Bits, Signed> t = rhs;
            for (unsigned i = 0; i < item_count; ++i)
            {
-                base_type rhs_item = get_item(rhs, big(i));
+                base_type rhs_item = get_item(t, big(i));

                if (lhs.items[big(i)] != rhs_item)
                    return lhs.items[big(i)] < rhs_item;
@ -779,9 +781,10 @@ public:
    {
        if constexpr (should_keep_size<T>())
        {
+            integer<Bits, Signed> t = rhs;
            for (unsigned i = 0; i < item_count; ++i)
            {
-                base_type rhs_item = get_item(rhs, any(i));
+                base_type rhs_item = get_item(t, any(i));

                if (lhs.items[any(i)] != rhs_item)
                    return false;
--- a/base/base/wide_integer_to_string.h
+++ b/base/base/wide_integer_to_string.h
@ -64,6 +64,6 @@ struct fmt::formatter<wide::integer<Bits, Signed>>
    template <typename FormatContext>
    auto format(const wide::integer<Bits, Signed> & value, FormatContext & ctx)
    {
-        return format_to(ctx.out(), "{}", to_string(value));
+        return fmt::format_to(ctx.out(), "{}", to_string(value));
    }
 };
--- a/base/poco/Util/src/XMLConfiguration.cpp
+++ b/base/poco/Util/src/XMLConfiguration.cpp
@ -27,7 +27,7 @@
 #include "Poco/Exception.h"
 #include "Poco/NumberParser.h"
 #include "Poco/NumberFormatter.h"
-#include <set>
+#include <unordered_map>


 namespace Poco {
--- a/base/readpassphrase/readpassphrase.c
+++ b/base/readpassphrase/readpassphrase.c
@ -27,9 +27,7 @@
 #define _PATH_TTY "/dev/tty"
 #endif

-#ifdef HAS_RESERVED_IDENTIFIER
 #pragma clang diagnostic ignored "-Wreserved-identifier"
-#endif

 #include <termios.h>
 #include <signal.h>
--- a/cmake/check_flags.cmake
+++ b/cmake/check_flags.cmake
@ -1,7 +1,5 @@
 include (CheckCXXCompilerFlag)
 include (CheckCCompilerFlag)

-check_cxx_compiler_flag("-Wreserved-identifier" HAS_RESERVED_IDENTIFIER)
-check_cxx_compiler_flag("-Wsuggest-destructor-override" HAS_SUGGEST_DESTRUCTOR_OVERRIDE)
-check_cxx_compiler_flag("-Wsuggest-override" HAS_SUGGEST_OVERRIDE)
-check_cxx_compiler_flag("-Xclang -fuse-ctor-homing" HAS_USE_CTOR_HOMING)
+# Set/unset variable based on existence of compiler flags. Example:
+# check_cxx_compiler_flag("-Wreserved-identifier" HAS_RESERVED_IDENTIFIER)
--- a/cmake/clang_tidy.cmake
+++ b/cmake/clang_tidy.cmake
@ -5,14 +5,14 @@ if (ENABLE_CLANG_TIDY)

    find_program (CLANG_TIDY_CACHE_PATH NAMES "clang-tidy-cache")
    if (CLANG_TIDY_CACHE_PATH)
-        find_program (_CLANG_TIDY_PATH NAMES "clang-tidy-15" "clang-tidy-14" "clang-tidy-13" "clang-tidy-12" "clang-tidy")
+        find_program (_CLANG_TIDY_PATH NAMES "clang-tidy-16" "clang-tidy-15" "clang-tidy-14" "clang-tidy")

        # Why do we use ';' here?
        # It's a cmake black magic: https://cmake.org/cmake/help/latest/prop_tgt/LANG_CLANG_TIDY.html#prop_tgt:%3CLANG%3E_CLANG_TIDY
        # The CLANG_TIDY_PATH is passed to CMAKE_CXX_CLANG_TIDY, which follows CXX_CLANG_TIDY syntax.
        set (CLANG_TIDY_PATH "${CLANG_TIDY_CACHE_PATH};${_CLANG_TIDY_PATH}" CACHE STRING "A combined command to run clang-tidy with caching wrapper")
    else ()
-        find_program (CLANG_TIDY_PATH NAMES "clang-tidy-15" "clang-tidy-14" "clang-tidy-13" "clang-tidy-12" "clang-tidy")
+        find_program (CLANG_TIDY_PATH NAMES "clang-tidy-16" "clang-tidy-15" "clang-tidy-14" "clang-tidy")
    endif ()

    if (CLANG_TIDY_PATH)
--- a/cmake/linux/toolchain-riscv64.cmake
+++ b/cmake/linux/toolchain-riscv64.cmake
@ -22,7 +22,6 @@ set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
 set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")

 set (CMAKE_EXE_LINKER_FLAGS_INIT "-fuse-ld=bfd")
-set (CMAKE_SHARED_LINKER_FLAGS_INIT "-fuse-ld=bfd")

 # Currently, lld does not work with the error:
 # ld.lld: error: section size decrease is too large
--- a/cmake/linux/toolchain-x86_64.cmake
+++ b/cmake/linux/toolchain-x86_64.cmake
@ -30,7 +30,6 @@ set (CMAKE_SYSROOT "${TOOLCHAIN_PATH}/x86_64-linux-gnu/libc")
 set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
 set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
 set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
-set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
 set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
 set (CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")

--- a/cmake/tools.cmake
+++ b/cmake/tools.cmake
@ -57,52 +57,46 @@ if (LINKER_NAME MATCHES "gold")
    message (FATAL_ERROR "Linking with gold is unsupported. Please use lld.")
 endif ()

-# s390x doesnt support lld
-if (NOT ARCH_S390X)
-    if (NOT LINKER_NAME)
-        if (COMPILER_GCC)
-            find_program (LLD_PATH NAMES "ld.lld")
-        elseif (COMPILER_CLANG)
-            # llvm lld is a generic driver.
-            # Invoke ld.lld (Unix), ld64.lld (macOS), lld-link (Windows), wasm-ld (WebAssembly) instead
-            if (OS_LINUX)
+if (NOT LINKER_NAME)
+    if (COMPILER_GCC)
+        find_program (LLD_PATH NAMES "ld.lld")
+    elseif (COMPILER_CLANG)
+        # llvm lld is a generic driver.
+        # Invoke ld.lld (Unix), ld64.lld (macOS), lld-link (Windows), wasm-ld (WebAssembly) instead
+        if (OS_LINUX)
+            if (NOT ARCH_S390X) # s390x doesnt support lld
                find_program (LLD_PATH NAMES "ld.lld-${COMPILER_VERSION_MAJOR}" "ld.lld")
-            elseif (OS_DARWIN)
-                find_program (LLD_PATH NAMES "ld64.lld-${COMPILER_VERSION_MAJOR}" "ld64.lld")
+            endif ()
+        elseif (OS_DARWIN)
+            find_program (LLD_PATH NAMES "ld64.lld-${COMPILER_VERSION_MAJOR}" "ld64.lld")
+        endif ()
+    endif ()
+    if (OS_LINUX OR OS_DARWIN)
+        if (LLD_PATH)
+            if (COMPILER_GCC)
+                # GCC driver requires one of supported linker names like "lld".
+                set (LINKER_NAME "lld")
+            else ()
+                # Clang driver simply allows full linker path.
+                set (LINKER_NAME ${LLD_PATH})
            endif ()
        endif ()
    endif()
 endif()

-if ((OS_LINUX OR OS_DARWIN) AND NOT LINKER_NAME)
-    if (LLD_PATH)
-        if (COMPILER_GCC)
-            # GCC driver requires one of supported linker names like "lld".
-            set (LINKER_NAME "lld")
-        else ()
-            # Clang driver simply allows full linker path.
-            set (LINKER_NAME ${LLD_PATH})
-        endif ()
-    endif ()
-endif ()
-# TODO: allow different linker on != OS_LINUX
-
 if (LINKER_NAME)
+    find_program (LLD_PATH NAMES ${LINKER_NAME})
+    if (NOT LLD_PATH)
+        message (FATAL_ERROR "Using linker ${LINKER_NAME} but can't find its path.")
+    endif ()
    if (COMPILER_CLANG)
-        find_program (LLD_PATH NAMES ${LINKER_NAME})
-        if (NOT LLD_PATH)
-            message (FATAL_ERROR "Using linker ${LINKER_NAME} but can't find its path.")
-        endif ()
-
-        # This a temporary quirk to emit .debug_aranges with ThinLTO
+        # This a temporary quirk to emit .debug_aranges with ThinLTO, can be removed after upgrade to clang-16
        set (LLD_WRAPPER "${CMAKE_CURRENT_BINARY_DIR}/ld.lld")
        configure_file ("${CMAKE_CURRENT_SOURCE_DIR}/cmake/ld.lld.in" "${LLD_WRAPPER}" @ONLY)

        set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --ld-path=${LLD_WRAPPER}")
-        set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --ld-path=${LLD_WRAPPER}")
    else ()
        set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=${LINKER_NAME}")
-        set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fuse-ld=${LINKER_NAME}")
    endif ()

 endif ()
--- a/contrib/croaring
+++ b/contrib/croaring
@ -1 +1 @@
-Subproject commit 2c867e9f9c9e2a3a7032791f94c4c7ae3013f6e0
+Subproject commit f40ed52bcdd635840a79877cef4857315dba817c
--- a/contrib/croaring-cmake/CMakeLists.txt
+++ b/contrib/croaring-cmake/CMakeLists.txt
@ -17,7 +17,8 @@ set(SRCS
    "${LIBRARY_DIR}/src/containers/run.c"
    "${LIBRARY_DIR}/src/roaring.c"
    "${LIBRARY_DIR}/src/roaring_priority_queue.c"
-    "${LIBRARY_DIR}/src/roaring_array.c")
+    "${LIBRARY_DIR}/src/roaring_array.c"
+    "${LIBRARY_DIR}/src/memory.c")

 add_library(_roaring ${SRCS})

--- a/contrib/llvm-project
+++ b/contrib/llvm-project
@ -1 +1 @@
-Subproject commit a8bf69e9cd39a23140a2b633c172d201484172da
+Subproject commit 4bfaeb31dd0ef13f025221f93c138974a3e0a22a
--- a/contrib/murmurhash/src/MurmurHash2.cpp
+++ b/contrib/murmurhash/src/MurmurHash2.cpp
@ -31,6 +31,40 @@
 #define BIG_CONSTANT(x) (x##LLU)

 #endif // !defined(_MSC_VER)
+//
+//-----------------------------------------------------------------------------
+// Block read - on little-endian machines this is a single load,
+// while on big-endian or unknown machines the byte accesses should
+// still get optimized into the most efficient instruction.
+static inline uint32_t getblock ( const uint32_t * p )
+{
+#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+  return *p;
+#else
+  const uint8_t *c = (const uint8_t *)p;
+  return (uint32_t)c[0] |
+	 (uint32_t)c[1] <<  8 |
+	 (uint32_t)c[2] << 16 |
+	 (uint32_t)c[3] << 24;
+#endif
+}
+
+static inline uint64_t getblock ( const uint64_t * p )
+{
+#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+  return *p;
+#else
+  const uint8_t *c = (const uint8_t *)p;
+  return (uint64_t)c[0] |
+	 (uint64_t)c[1] <<  8 |
+	 (uint64_t)c[2] << 16 |
+	 (uint64_t)c[3] << 24 |
+	 (uint64_t)c[4] << 32 |
+	 (uint64_t)c[5] << 40 |
+	 (uint64_t)c[6] << 48 |
+	 (uint64_t)c[7] << 56;
+#endif
+}

 //-----------------------------------------------------------------------------

@ -52,7 +86,7 @@ uint32_t MurmurHash2 ( const void * key, size_t len, uint32_t seed )

  while(len >= 4)
  {
-    uint32_t k = *(uint32_t*)data;
+    uint32_t k = getblock((const uint32_t *)data);

    k *= m;
    k ^= k >> r;
@ -105,7 +139,7 @@ uint64_t MurmurHash64A ( const void * key, size_t len, uint64_t seed )

  while(data != end)
  {
-    uint64_t k = *data++;
+    uint64_t k = getblock(data++);

    k *= m; 
    k ^= k >> r; 
@ -151,12 +185,12 @@ uint64_t MurmurHash64B ( const void * key, size_t len, uint64_t seed )

  while(len >= 8)
  {
-    uint32_t k1 = *data++;
+    uint32_t k1 = getblock(data++);
    k1 *= m; k1 ^= k1 >> r; k1 *= m;
    h1 *= m; h1 ^= k1;
    len -= 4;

-    uint32_t k2 = *data++;
+    uint32_t k2 = getblock(data++);
    k2 *= m; k2 ^= k2 >> r; k2 *= m;
    h2 *= m; h2 ^= k2;
    len -= 4;
@ -164,7 +198,7 @@ uint64_t MurmurHash64B ( const void * key, size_t len, uint64_t seed )

  if(len >= 4)
  {
-    uint32_t k1 = *data++;
+    uint32_t k1 = getblock(data++);
    k1 *= m; k1 ^= k1 >> r; k1 *= m;
    h1 *= m; h1 ^= k1;
    len -= 4;
@ -215,7 +249,7 @@ uint32_t MurmurHash2A ( const void * key, size_t len, uint32_t seed )

  while(len >= 4)
  {
-    uint32_t k = *(uint32_t*)data;
+    uint32_t k = getblock((const uint32_t *)data);

    mmix(h,k);

@ -278,7 +312,7 @@ public:

    while(len >= 4)
    {
-      uint32_t k = *(uint32_t*)data;
+      uint32_t k = getblock((const uint32_t *)data);

      mmix(m_hash,k);

@ -427,7 +461,7 @@ uint32_t MurmurHashAligned2 ( const void * key, size_t len, uint32_t seed )

    while(len >= 4)
    {
-      d = *(uint32_t *)data;
+      d = getblock((const uint32_t *)data);
      t = (t >> sr) | (d << sl);

      uint32_t k = t;
@ -492,7 +526,7 @@ uint32_t MurmurHashAligned2 ( const void * key, size_t len, uint32_t seed )
  {
    while(len >= 4)
    {
-      uint32_t k = *(uint32_t *)data;
+      uint32_t k = getblock((const uint32_t *)data);

      MIX(h,k,m);

--- a/contrib/murmurhash/src/MurmurHash3.cpp
+++ b/contrib/murmurhash/src/MurmurHash3.cpp
@ -55,14 +55,32 @@ inline uint64_t rotl64 ( uint64_t x, int8_t r )

 FORCE_INLINE uint32_t getblock32 ( const uint32_t * p, int i )
 {
-  uint32_t res;
-  memcpy(&res, p + i, sizeof(res));
-  return res;
+#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+  return p[i];
+#else
+  const uint8_t *c = (const uint8_t *)&p[i];
+  return (uint32_t)c[0] |
+	 (uint32_t)c[1] <<  8 |
+	 (uint32_t)c[2] << 16 |
+	 (uint32_t)c[3] << 24;
+#endif
 }

 FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, int i )
 {
+#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
  return p[i];
+#else
+  const uint8_t *c = (const uint8_t *)&p[i];
+  return (uint64_t)c[0] |
+	 (uint64_t)c[1] <<  8 |
+	 (uint64_t)c[2] << 16 |
+	 (uint64_t)c[3] << 24 |
+	 (uint64_t)c[4] << 32 |
+	 (uint64_t)c[5] << 40 |
+	 (uint64_t)c[6] << 48 |
+	 (uint64_t)c[7] << 56;
+#endif
 }

 //-----------------------------------------------------------------------------
@ -329,9 +347,13 @@ void MurmurHash3_x64_128 ( const void * key, const size_t len,

  h1 += h2;
  h2 += h1;
-
+#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
  ((uint64_t*)out)[0] = h1;
  ((uint64_t*)out)[1] = h2;
+#else
+  ((uint64_t*)out)[0] = h2;
+  ((uint64_t*)out)[1] = h1;
+#endif
 }

 //-----------------------------------------------------------------------------
--- a/contrib/qpl-cmake/benchmark_sample/client_scripts/allin1_ssb.sh
+++ b/contrib/qpl-cmake/benchmark_sample/client_scripts/allin1_ssb.sh
@ -0,0 +1,530 @@
+#!/bin/bash
+ckhost="localhost"
+ckport=("9000" "9001" "9002" "9003")
+WORKING_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/.."
+OUTPUT_DIR="${WORKING_DIR}/output"
+LOG_DIR="${OUTPUT_DIR}/log"
+RAWDATA_DIR="${WORKING_DIR}/rawdata_dir"
+database_dir="${WORKING_DIR}/database_dir"
+CLIENT_SCRIPTS_DIR="${WORKING_DIR}/client_scripts"
+LOG_PACK_FILE="$(date +%Y-%m-%d-%H-%M-%S)"
+QUERY_FILE="queries_ssb.sql"
+SERVER_BIND_CMD[0]="numactl -m 0 -N 0"
+SERVER_BIND_CMD[1]="numactl -m 0 -N 0"
+SERVER_BIND_CMD[2]="numactl -m 1 -N 1"
+SERVER_BIND_CMD[3]="numactl -m 1 -N 1"
+CLIENT_BIND_CMD=""
+SSB_GEN_FACTOR=20
+TABLE_NAME="lineorder_flat"
+TALBE_ROWS="119994608"
+CODEC_CONFIG="lz4 deflate zstd"
+
+# define instance number
+inst_num=$1
+if [ ! -n "$1" ]; then
+        echo "Please clarify instance number from 1,2,3 or 4"
+        exit 1
+else
+        echo "Benchmarking with instance number:$1"
+fi
+
+if [ ! -d "$OUTPUT_DIR" ]; then
+mkdir $OUTPUT_DIR
+fi
+if [ ! -d "$LOG_DIR" ]; then
+mkdir $LOG_DIR
+fi
+if [ ! -d "$RAWDATA_DIR" ]; then
+mkdir $RAWDATA_DIR
+fi
+
+# define different directories
+dir_server=("" "_s2" "_s3" "_s4")
+ckreadSql="
+    CREATE TABLE customer
+    (
+            C_CUSTKEY       UInt32,
+            C_NAME          String,
+            C_ADDRESS       String,
+            C_CITY          LowCardinality(String),
+            C_NATION        LowCardinality(String),
+            C_REGION        LowCardinality(String),
+            C_PHONE         String,
+            C_MKTSEGMENT    LowCardinality(String)
+    )
+    ENGINE = MergeTree ORDER BY (C_CUSTKEY);
+
+    CREATE TABLE lineorder
+    (
+        LO_ORDERKEY             UInt32,
+        LO_LINENUMBER           UInt8,
+        LO_CUSTKEY              UInt32,
+        LO_PARTKEY              UInt32,
+        LO_SUPPKEY              UInt32,
+        LO_ORDERDATE            Date,
+        LO_ORDERPRIORITY        LowCardinality(String),
+        LO_SHIPPRIORITY         UInt8,
+        LO_QUANTITY             UInt8,
+        LO_EXTENDEDPRICE        UInt32,
+        LO_ORDTOTALPRICE        UInt32,
+        LO_DISCOUNT             UInt8,
+        LO_REVENUE              UInt32,
+        LO_SUPPLYCOST           UInt32,
+        LO_TAX                  UInt8,
+        LO_COMMITDATE           Date,
+        LO_SHIPMODE             LowCardinality(String)
+    )
+    ENGINE = MergeTree PARTITION BY toYear(LO_ORDERDATE) ORDER BY (LO_ORDERDATE, LO_ORDERKEY);
+
+    CREATE TABLE part
+    (
+            P_PARTKEY       UInt32,
+            P_NAME          String,
+            P_MFGR          LowCardinality(String),
+            P_CATEGORY      LowCardinality(String),
+            P_BRAND         LowCardinality(String),
+            P_COLOR         LowCardinality(String),
+            P_TYPE          LowCardinality(String),
+            P_SIZE          UInt8,
+            P_CONTAINER     LowCardinality(String)
+    )
+    ENGINE = MergeTree ORDER BY P_PARTKEY;
+
+    CREATE TABLE supplier
+    (
+            S_SUPPKEY       UInt32,
+            S_NAME          String,
+            S_ADDRESS       String,
+            S_CITY          LowCardinality(String),
+            S_NATION        LowCardinality(String),
+            S_REGION        LowCardinality(String),
+            S_PHONE         String
+    )
+    ENGINE = MergeTree ORDER BY S_SUPPKEY;
+"
+supplier_table="
+   CREATE TABLE supplier
+    (
+            S_SUPPKEY       UInt32,
+            S_NAME          String,
+            S_ADDRESS       String,
+            S_CITY          LowCardinality(String),
+            S_NATION        LowCardinality(String),
+            S_REGION        LowCardinality(String),
+            S_PHONE         String
+    )
+    ENGINE = MergeTree ORDER BY S_SUPPKEY;
+"
+part_table="
+    CREATE TABLE part
+    (
+            P_PARTKEY       UInt32,
+            P_NAME          String,
+            P_MFGR          LowCardinality(String),
+            P_CATEGORY      LowCardinality(String),
+            P_BRAND         LowCardinality(String),
+            P_COLOR         LowCardinality(String),
+            P_TYPE          LowCardinality(String),
+            P_SIZE          UInt8,
+            P_CONTAINER     LowCardinality(String)
+    )
+    ENGINE = MergeTree ORDER BY P_PARTKEY;
+"
+lineorder_table="
+    CREATE TABLE lineorder
+    (
+        LO_ORDERKEY             UInt32,
+        LO_LINENUMBER           UInt8,
+        LO_CUSTKEY              UInt32,
+        LO_PARTKEY              UInt32,
+        LO_SUPPKEY              UInt32,
+        LO_ORDERDATE            Date,
+        LO_ORDERPRIORITY        LowCardinality(String),
+        LO_SHIPPRIORITY         UInt8,
+        LO_QUANTITY             UInt8,
+        LO_EXTENDEDPRICE        UInt32,
+        LO_ORDTOTALPRICE        UInt32,
+        LO_DISCOUNT             UInt8,
+        LO_REVENUE              UInt32,
+        LO_SUPPLYCOST           UInt32,
+        LO_TAX                  UInt8,
+        LO_COMMITDATE           Date,
+        LO_SHIPMODE             LowCardinality(String)
+    )
+    ENGINE = MergeTree PARTITION BY toYear(LO_ORDERDATE) ORDER BY (LO_ORDERDATE, LO_ORDERKEY);
+"
+customer_table="
+    CREATE TABLE customer
+    (
+            C_CUSTKEY       UInt32,
+            C_NAME          String,
+            C_ADDRESS       String,
+            C_CITY          LowCardinality(String),
+            C_NATION        LowCardinality(String),
+            C_REGION        LowCardinality(String),
+            C_PHONE         String,
+            C_MKTSEGMENT    LowCardinality(String)
+    )
+    ENGINE = MergeTree ORDER BY (C_CUSTKEY);
+"
+
+lineorder_flat_table="
+    SET max_memory_usage = 20000000000;
+    CREATE TABLE lineorder_flat
+    ENGINE = MergeTree
+    PARTITION BY toYear(LO_ORDERDATE)
+    ORDER BY (LO_ORDERDATE, LO_ORDERKEY) AS
+    SELECT
+        l.LO_ORDERKEY AS LO_ORDERKEY,
+        l.LO_LINENUMBER AS LO_LINENUMBER,
+        l.LO_CUSTKEY AS LO_CUSTKEY,
+        l.LO_PARTKEY AS LO_PARTKEY,
+        l.LO_SUPPKEY AS LO_SUPPKEY,
+        l.LO_ORDERDATE AS LO_ORDERDATE,
+        l.LO_ORDERPRIORITY AS LO_ORDERPRIORITY,
+        l.LO_SHIPPRIORITY AS LO_SHIPPRIORITY,
+        l.LO_QUANTITY AS LO_QUANTITY,
+        l.LO_EXTENDEDPRICE AS LO_EXTENDEDPRICE,
+        l.LO_ORDTOTALPRICE AS LO_ORDTOTALPRICE,
+        l.LO_DISCOUNT AS LO_DISCOUNT,
+        l.LO_REVENUE AS LO_REVENUE,
+        l.LO_SUPPLYCOST AS LO_SUPPLYCOST,
+        l.LO_TAX AS LO_TAX,
+        l.LO_COMMITDATE AS LO_COMMITDATE,
+        l.LO_SHIPMODE AS LO_SHIPMODE,
+        c.C_NAME AS C_NAME,
+        c.C_ADDRESS AS C_ADDRESS,
+        c.C_CITY AS C_CITY,
+        c.C_NATION AS C_NATION,
+        c.C_REGION AS C_REGION,
+        c.C_PHONE AS C_PHONE,
+        c.C_MKTSEGMENT AS C_MKTSEGMENT,
+        s.S_NAME AS S_NAME,
+        s.S_ADDRESS AS S_ADDRESS,
+        s.S_CITY AS S_CITY,
+        s.S_NATION AS S_NATION,
+        s.S_REGION AS S_REGION,
+        s.S_PHONE AS S_PHONE,
+        p.P_NAME AS P_NAME,
+        p.P_MFGR AS P_MFGR,
+        p.P_CATEGORY AS P_CATEGORY,
+        p.P_BRAND AS P_BRAND,
+        p.P_COLOR AS P_COLOR,
+        p.P_TYPE AS P_TYPE,
+        p.P_SIZE AS P_SIZE,
+        p.P_CONTAINER AS P_CONTAINER
+    FROM lineorder AS l
+    INNER JOIN customer AS c ON c.C_CUSTKEY = l.LO_CUSTKEY
+    INNER JOIN supplier AS s ON s.S_SUPPKEY = l.LO_SUPPKEY
+    INNER JOIN part AS p ON p.P_PARTKEY = l.LO_PARTKEY;
+    show settings ilike 'max_memory_usage';
+"
+ 
+function insert_data(){
+        echo "insert_data:$1"
+        create_table_prefix="clickhouse client --host ${ckhost} --port $2 --multiquery -q"
+        insert_data_prefix="clickhouse client --query "
+        case $1 in
+          all)
+                clickhouse client --host ${ckhost} --port $2 --multiquery -q"$ckreadSql" && {
+                ${insert_data_prefix} "INSERT INTO customer FORMAT CSV" < ${RAWDATA_DIR}/ssb-dbgen/customer.tbl --port=$2
+                ${insert_data_prefix} "INSERT INTO part FORMAT CSV" < ${RAWDATA_DIR}/ssb-dbgen/part.tbl --port=$2
+                ${insert_data_prefix} "INSERT INTO supplier FORMAT CSV" < ${RAWDATA_DIR}/ssb-dbgen/supplier.tbl --port=$2
+                ${insert_data_prefix} "INSERT INTO lineorder FORMAT CSV" < ${RAWDATA_DIR}/ssb-dbgen/lineorder.tbl --port=$2
+                }
+                ${create_table_prefix}"${lineorder_flat_table}" 
+          ;;
+          customer)
+                echo ${create_table_prefix}\"${customer_table}\"
+                ${create_table_prefix}"${customer_table}" && {
+                echo "${insert_data_prefix} \"INSERT INTO $1 FORMAT CSV\" < ${RAWDATA_DIR}/ssb-dbgen/$1.tbl --port=$2"
+                ${insert_data_prefix} "INSERT INTO $1 FORMAT CSV" < ${RAWDATA_DIR}/ssb-dbgen/$1.tbl --port=$2
+                }
+          ;;
+          part)
+                echo ${create_table_prefix}\"${part_table}\"
+                ${create_table_prefix}"${part_table}" && {
+                echo "${insert_data_prefix} \"INSERT INTO $1 FORMAT CSV\" < ${RAWDATA_DIR}/ssb-dbgen/$1.tbl --port=$2"
+                ${insert_data_prefix} "INSERT INTO $1 FORMAT CSV" < ${RAWDATA_DIR}/ssb-dbgen/$1.tbl --port=$2
+                }
+          ;;
+          supplier)
+                echo ${create_table_prefix}"${supplier_table}"
+                ${create_table_prefix}"${supplier_table}" && {
+                echo "${insert_data_prefix} \"INSERT INTO $1 FORMAT CSV\" < ${RAWDATA_DIR}/ssb-dbgen/$1.tbl --port=$2"
+                ${insert_data_prefix} "INSERT INTO $1 FORMAT CSV" < ${RAWDATA_DIR}/ssb-dbgen/$1.tbl --port=$2
+                }
+          ;;
+          lineorder)
+                echo ${create_table_prefix}"${lineorder_table}"
+                ${create_table_prefix}"${lineorder_table}" && {
+                echo "${insert_data_prefix} \"INSERT INTO $1 FORMAT CSV\" < ${RAWDATA_DIR}/ssb-dbgen/$1.tbl --port=$2"
+                ${insert_data_prefix} "INSERT INTO $1 FORMAT CSV" < ${RAWDATA_DIR}/ssb-dbgen/$1.tbl --port=$2
+                }
+          ;;
+          lineorder_flat)
+                echo ${create_table_prefix}"${lineorder_flat_table}"
+                ${create_table_prefix}"${lineorder_flat_table}" 
+                return 0
+          ;;
+          *)
+                exit 0
+                ;;
+
+        esac
+}
+
+function check_sql(){
+        select_sql="select * from "$1" limit 1"
+        clickhouse client --host ${ckhost} --port $2 --multiquery -q"${select_sql}"
+}
+
+function check_table(){
+        checknum=0
+        source_tables="customer part supplier lineorder lineorder_flat"
+        test_tables=${1:-${source_tables}}
+        echo "Checking table data required in server..."
+        for i in $(seq 0 $[inst_num-1])
+        do
+                for j in `echo ${test_tables}`
+                do
+                        check_sql $j ${ckport[i]} &> /dev/null || {
+                                let checknum+=1 && insert_data "$j" ${ckport[i]}
+                        }
+                done
+        done
+
+        for i in $(seq 0 $[inst_num-1])
+        do
+                echo "clickhouse client --host ${ckhost} --port ${ckport[i]} -m -q\"select count() from ${TABLE_NAME};\""
+                var=$(clickhouse client --host ${ckhost} --port ${ckport[i]} -m -q"select count() from ${TABLE_NAME};")
+                if [ $var -eq $TALBE_ROWS ];then
+                        echo "Instance_${i} Table data integrity check OK -> Rows:$var"
+                else
+                        echo  "Instance_${i} Table data integrity check Failed -> Rows:$var"
+                        exit 1
+                fi
+        done
+        if [ $checknum -gt 0 ];then
+                echo "Need sleep 10s after first table data insertion...$checknum"
+                sleep 10
+        fi
+}
+
+function check_instance(){
+instance_alive=0
+for i in {1..10}
+do
+        sleep 1
+        netstat -nltp | grep ${1} > /dev/null
+        if [ $? -ne 1 ];then
+                instance_alive=1
+                break
+        fi
+        
+done
+
+if [ $instance_alive -eq 0 ];then
+        echo "check_instance -> clickhouse server instance faild to launch due to 10s timeout!"
+        exit 1
+else
+        echo "check_instance -> clickhouse server instance launch successfully!"
+fi
+}
+
+function start_clickhouse_for_insertion(){
+        echo "start_clickhouse_for_insertion"
+        for i in $(seq 0 $[inst_num-1])
+	do                
+                echo "cd ${database_dir}/$1${dir_server[i]}"
+                echo "${SERVER_BIND_CMD[i]} clickhouse server -C config_${1}${dir_server[i]}.xml >&${LOG_DIR}/${1}_${i}_server_log& > /dev/null"
+                
+	        cd ${database_dir}/$1${dir_server[i]}
+	        ${SERVER_BIND_CMD[i]} clickhouse server -C config_${1}${dir_server[i]}.xml >&${LOG_DIR}/${1}_${i}_server_log& > /dev/null
+                check_instance ${ckport[i]}
+        done
+}
+
+function start_clickhouse_for_stressing(){
+        echo "start_clickhouse_for_stressing"
+        for i in $(seq 0 $[inst_num-1])
+	do
+                echo "cd ${database_dir}/$1${dir_server[i]}"
+                echo "${SERVER_BIND_CMD[i]} clickhouse server -C config_${1}${dir_server[i]}.xml >&/dev/null&"
+                
+	        cd ${database_dir}/$1${dir_server[i]}
+	        ${SERVER_BIND_CMD[i]} clickhouse server -C config_${1}${dir_server[i]}.xml >&/dev/null&
+                check_instance ${ckport[i]}
+        done
+}
+yum -y install git make gcc sudo net-tools &> /dev/null
+pip3 install clickhouse_driver numpy &> /dev/null
+test -d ${RAWDATA_DIR}/ssb-dbgen || git clone https://github.com/vadimtk/ssb-dbgen.git ${RAWDATA_DIR}/ssb-dbgen && cd ${RAWDATA_DIR}/ssb-dbgen
+
+if [ ! -f ${RAWDATA_DIR}/ssb-dbgen/dbgen ];then
+        make && {
+        test -f ${RAWDATA_DIR}/ssb-dbgen/customer.tbl || echo y |./dbgen -s ${SSB_GEN_FACTOR} -T c
+        test -f ${RAWDATA_DIR}/ssb-dbgen/part.tbl  || echo y | ./dbgen -s ${SSB_GEN_FACTOR} -T p
+        test -f ${RAWDATA_DIR}/ssb-dbgen/supplier.tbl || echo y | ./dbgen -s ${SSB_GEN_FACTOR} -T s
+        test -f ${RAWDATA_DIR}/ssb-dbgen/date.tbl || echo y | ./dbgen -s ${SSB_GEN_FACTOR} -T d
+        test -f ${RAWDATA_DIR}/ssb-dbgen/lineorder.tbl || echo y | ./dbgen -s ${SSB_GEN_FACTOR} -T l
+        }
+else
+        test -f ${RAWDATA_DIR}/ssb-dbgen/customer.tbl || echo y | ./dbgen -s ${SSB_GEN_FACTOR} -T c
+        test -f ${RAWDATA_DIR}/ssb-dbgen/part.tbl  || echo y |  ./dbgen -s ${SSB_GEN_FACTOR} -T p
+        test -f ${RAWDATA_DIR}/ssb-dbgen/supplier.tbl || echo y | ./dbgen -s ${SSB_GEN_FACTOR} -T s
+        test -f ${RAWDATA_DIR}/ssb-dbgen/date.tbl || echo y |  ./dbgen -s ${SSB_GEN_FACTOR} -T d
+        test -f ${RAWDATA_DIR}/ssb-dbgen/lineorder.tbl || echo y | ./dbgen -s ${SSB_GEN_FACTOR} -T l
+
+fi
+
+filenum=`find ${RAWDATA_DIR}/ssb-dbgen/ -name "*.tbl" | wc -l`
+
+if [ $filenum -ne 5 ];then
+        echo "generate ssb data file *.tbl faild"
+        exit 1
+fi
+
+function kill_instance(){
+instance_alive=1  
+for i in {1..2}
+do
+	pkill clickhouse && sleep 5
+        instance_alive=0        
+        for i in $(seq 0 $[inst_num-1])
+        do
+                netstat -nltp | grep ${ckport[i]} > /dev/null
+                if [ $? -ne 1 ];then
+                        instance_alive=1
+                        break;
+                fi
+        done
+        if [ $instance_alive -eq 0 ];then
+                break;
+        fi        
+done
+if [ $instance_alive -eq 0 ];then
+        echo "kill_instance OK!"
+else
+        echo "kill_instance Failed -> clickhouse server instance still alive due to 10s timeout"
+        exit 1        
+fi
+}
+
+function run_test(){
+is_xml=0
+for i in $(seq 0 $[inst_num-1])
+do
+        if [ -f ${database_dir}/${1}${dir_server[i]}/config_${1}${dir_server[i]}.xml ]; then
+                is_xml=$[is_xml+1]
+        fi
+done
+if [ $is_xml -eq $inst_num ];then
+        echo "Benchmark with $inst_num instance"
+        start_clickhouse_for_insertion ${1}
+
+        for i in $(seq 0 $[inst_num-1])
+        do
+                clickhouse client --host ${ckhost} --port ${ckport[i]} -m -q"show databases;" >/dev/null
+        done
+
+        if [ $? -eq 0 ];then
+                check_table
+        fi
+        kill_instance
+
+        if [ $1 == "deflate" ];then
+	        test -f ${LOG_DIR}/${1}_server_log && deflatemsg=`cat ${LOG_DIR}/${1}_server_log | grep DeflateJobHWPool`
+	        if [ -n "$deflatemsg" ];then
+	                echo ------------------------------------------------------
+	                echo $deflatemsg
+	                echo ------------------------------------------------------
+	        fi
+	fi
+        echo "Check table data required in server_${1} -> Done! "
+        
+        start_clickhouse_for_stressing ${1}
+        for i in $(seq 0 $[inst_num-1])
+        do
+                clickhouse client --host ${ckhost} --port ${ckport[i]} -m -q"show databases;" >/dev/null
+        done
+        if [ $? -eq 0 ];then
+                test -d ${CLIENT_SCRIPTS_DIR}  && cd ${CLIENT_SCRIPTS_DIR}
+                echo "Client stressing... "
+                echo "${CLIENT_BIND_CMD} python3 client_stressing_test.py ${QUERY_FILE} $inst_num &> ${LOG_DIR}/${1}.log"
+                ${CLIENT_BIND_CMD} python3 client_stressing_test.py ${QUERY_FILE} $inst_num &> ${LOG_DIR}/${1}.log
+                echo "Completed client stressing, checking log... "
+                finish_log=`grep "Finished" ${LOG_DIR}/${1}.log | wc -l`
+	        if [ $finish_log -eq 1 ] ;then
+                        kill_instance
+	                test -f ${LOG_DIR}/${1}.log && echo  "${1}.log ===> ${LOG_DIR}/${1}.log"
+	        else
+	                kill_instance
+	                echo "No find 'Finished' in client log -> Performance test may fail"
+	                exit 1
+
+	        fi
+
+	    else
+                echo "${1} clickhouse server start fail"
+                exit 1
+        fi
+else
+        echo "clickhouse server start fail -> Please check xml files required in ${database_dir} for each instance"
+        exit 1
+
+fi
+}
+function clear_log(){
+        if [ -d "$LOG_DIR" ]; then
+                cd ${LOG_DIR} && rm -rf *
+        fi     
+}
+
+function gather_log_for_codec(){
+        cd ${OUTPUT_DIR} && mkdir -p ${LOG_PACK_FILE}/${1}
+        cp -rf ${LOG_DIR} ${OUTPUT_DIR}/${LOG_PACK_FILE}/${1}
+}
+
+function pack_log(){
+        if [ -e "${OUTPUT_DIR}/run.log" ]; then
+                cp ${OUTPUT_DIR}/run.log ${OUTPUT_DIR}/${LOG_PACK_FILE}/
+        fi
+        echo "Please check all log information in ${OUTPUT_DIR}/${LOG_PACK_FILE}"
+}
+
+function setup_check(){
+
+        iax_dev_num=`accel-config list | grep iax | wc -l`
+	if [ $iax_dev_num -eq 0 ] ;then
+                iax_dev_num=`accel-config list | grep iax | wc -l`
+                if [ $iax_dev_num -eq 0 ] ;then
+                        echo "No IAA devices available -> Please check IAA hardware setup manually!"
+                        exit 1
+                else
+	                echo "IAA enabled devices number:$iax_dev_num"
+                fi
+	else
+	        echo "IAA enabled devices number:$iax_dev_num"
+	fi        
+        libaccel_version=`accel-config -v`
+        clickhouser_version=`clickhouse server --version`
+        kernel_dxd_log=`dmesg | grep dxd`
+        echo "libaccel_version:$libaccel_version"
+        echo "clickhouser_version:$clickhouser_version"
+        echo -e "idxd section in kernel log:\n$kernel_dxd_log"
+}
+
+setup_check
+export CLICKHOUSE_WATCHDOG_ENABLE=0
+for i in  ${CODEC_CONFIG[@]}
+do
+        clear_log
+        codec=${i}
+        echo "run test------------$codec"
+        run_test $codec
+        gather_log_for_codec $codec
+done
+
+pack_log
+echo "Done."
--- a/contrib/qpl-cmake/benchmark_sample/client_scripts/client_stressing_test.py
+++ b/contrib/qpl-cmake/benchmark_sample/client_scripts/client_stressing_test.py
@ -0,0 +1,278 @@
+from operator import eq
+import os
+import random
+import time
+import sys
+from clickhouse_driver import Client
+import numpy as np
+import subprocess
+import multiprocessing
+from multiprocessing import Manager
+
+warmup_runs = 10
+calculated_runs = 10
+seconds = 30
+max_instances_number = 8
+retest_number = 3
+retest_tolerance = 10
+
+
+def checkInt(str):
+    try:
+        int(str)
+        return True
+    except ValueError:
+        return False
+
+
+def setup_client(index):
+    if index < 4:
+        port_idx = index
+    else:
+        port_idx = index + 4
+    client = Client(
+        host="localhost",
+        database="default",
+        user="default",
+        password="",
+        port="900%d" % port_idx,
+    )
+    union_mode_query = "SET union_default_mode='DISTINCT'"
+    client.execute(union_mode_query)
+    return client
+
+
+def warm_client(clientN, clientL, query, loop):
+    for c_idx in range(clientN):
+        for _ in range(loop):
+            clientL[c_idx].execute(query)
+
+
+def read_queries(queries_list):
+    queries = list()
+    queries_id = list()
+    with open(queries_list, "r") as f:
+        for line in f:
+            line = line.rstrip()
+            line = line.split("$")
+            queries_id.append(line[0])
+            queries.append(line[1])
+    return queries_id, queries
+
+
+def run_task(client, cname, query, loop, query_latency):
+    start_time = time.time()
+    for i in range(loop):
+        client.execute(query)
+        query_latency.append(client.last_query.elapsed)
+
+    end_time = time.time()
+    p95 = np.percentile(query_latency, 95)
+    print(
+        "CLIENT: {0} end. -> P95: %f, qps: %f".format(cname)
+        % (p95, loop / (end_time - start_time))
+    )
+
+
+def run_multi_clients(clientN, clientList, query, loop):
+    client_pids = {}
+    start_time = time.time()
+    manager = multiprocessing.Manager()
+    query_latency_list0 = manager.list()
+    query_latency_list1 = manager.list()
+    query_latency_list2 = manager.list()
+    query_latency_list3 = manager.list()
+    query_latency_list4 = manager.list()
+    query_latency_list5 = manager.list()
+    query_latency_list6 = manager.list()
+    query_latency_list7 = manager.list()
+
+    for c_idx in range(clientN):
+        client_name = "Role_%d" % c_idx
+        if c_idx == 0:
+            client_pids[c_idx] = multiprocessing.Process(
+                target=run_task,
+                args=(clientList[c_idx], client_name, query, loop, query_latency_list0),
+            )
+        elif c_idx == 1:
+            client_pids[c_idx] = multiprocessing.Process(
+                target=run_task,
+                args=(clientList[c_idx], client_name, query, loop, query_latency_list1),
+            )
+        elif c_idx == 2:
+            client_pids[c_idx] = multiprocessing.Process(
+                target=run_task,
+                args=(clientList[c_idx], client_name, query, loop, query_latency_list2),
+            )
+        elif c_idx == 3:
+            client_pids[c_idx] = multiprocessing.Process(
+                target=run_task,
+                args=(clientList[c_idx], client_name, query, loop, query_latency_list3),
+            )
+        elif c_idx == 4:
+            client_pids[c_idx] = multiprocessing.Process(
+                target=run_task,
+                args=(clientList[c_idx], client_name, query, loop, query_latency_list4),
+            )
+        elif c_idx == 5:
+            client_pids[c_idx] = multiprocessing.Process(
+                target=run_task,
+                args=(clientList[c_idx], client_name, query, loop, query_latency_list5),
+            )
+        elif c_idx == 6:
+            client_pids[c_idx] = multiprocessing.Process(
+                target=run_task,
+                args=(clientList[c_idx], client_name, query, loop, query_latency_list6),
+            )
+        elif c_idx == 7:
+            client_pids[c_idx] = multiprocessing.Process(
+                target=run_task,
+                args=(clientList[c_idx], client_name, query, loop, query_latency_list7),
+            )
+        else:
+            print("ERROR: CLIENT number dismatch!!")
+            exit()
+        print("CLIENT: %s start" % client_name)
+        client_pids[c_idx].start()
+
+    for c_idx in range(clientN):
+        client_pids[c_idx].join()
+    end_time = time.time()
+    totalT = end_time - start_time
+
+    query_latencyTotal = list()
+    for item in query_latency_list0:
+        query_latencyTotal.append(item)
+    for item in query_latency_list1:
+        query_latencyTotal.append(item)
+    for item in query_latency_list2:
+        query_latencyTotal.append(item)
+    for item in query_latency_list3:
+        query_latencyTotal.append(item)
+    for item in query_latency_list4:
+        query_latencyTotal.append(item)
+    for item in query_latency_list5:
+        query_latencyTotal.append(item)
+    for item in query_latency_list6:
+        query_latencyTotal.append(item)
+    for item in query_latency_list7:
+        query_latencyTotal.append(item)
+
+    totalP95 = np.percentile(query_latencyTotal, 95) * 1000
+    return totalT, totalP95
+
+
+def run_task_caculated(client, cname, query, loop):
+    query_latency = list()
+    start_time = time.time()
+    for i in range(loop):
+        client.execute(query)
+        query_latency.append(client.last_query.elapsed)
+    end_time = time.time()
+    p95 = np.percentile(query_latency, 95)
+
+
+def run_multi_clients_caculated(clientN, clientList, query, loop):
+    client_pids = {}
+    start_time = time.time()
+    for c_idx in range(clientN):
+        client_name = "Role_%d" % c_idx
+        client_pids[c_idx] = multiprocessing.Process(
+            target=run_task_caculated,
+            args=(clientList[c_idx], client_name, query, loop),
+        )
+        client_pids[c_idx].start()
+    for c_idx in range(clientN):
+        client_pids[c_idx].join()
+    end_time = time.time()
+    totalT = end_time - start_time
+    return totalT
+
+
+if __name__ == "__main__":
+    client_number = 1
+    queries = list()
+    queries_id = list()
+
+    if len(sys.argv) != 3:
+        print(
+            "usage: python3 client_stressing_test.py [queries_file_path] [client_number]"
+        )
+        sys.exit()
+    else:
+        queries_list = sys.argv[1]
+        client_number = int(sys.argv[2])
+        print(
+            "queries_file_path: %s, client_number: %d" % (queries_list, client_number)
+        )
+        if not os.path.isfile(queries_list) or not os.access(queries_list, os.R_OK):
+            print("please check the right path for queries file")
+            sys.exit()
+        if (
+            not checkInt(sys.argv[2])
+            or int(sys.argv[2]) > max_instances_number
+            or int(sys.argv[2]) < 1
+        ):
+            print("client_number should be in [1~%d]" % max_instances_number)
+            sys.exit()
+
+    client_list = {}
+    queries_id, queries = read_queries(queries_list)
+
+    for c_idx in range(client_number):
+        client_list[c_idx] = setup_client(c_idx)
+    # clear cache
+    os.system("sync; echo 3 > /proc/sys/vm/drop_caches")
+
+    print("###Polit Run Begin")
+    for i in queries:
+        warm_client(client_number, client_list, i, 1)
+    print("###Polit Run End -> Start stressing....")
+
+    query_index = 0
+    for q in queries:
+        print(
+            "\n###START -> Index: %d, ID: %s, Query: %s"
+            % (query_index, queries_id[query_index], q)
+        )
+        warm_client(client_number, client_list, q, warmup_runs)
+        print("###Warm Done!")
+        for j in range(0, retest_number):
+            totalT = run_multi_clients_caculated(
+                client_number, client_list, q, calculated_runs
+            )
+            curr_loop = int(seconds * calculated_runs / totalT) + 1
+            print(
+                "###Calculation Done! -> loopN: %d, expected seconds:%d"
+                % (curr_loop, seconds)
+            )
+
+            print("###Stress Running! -> %d iterations......" % curr_loop)
+
+            totalT, totalP95 = run_multi_clients(
+                client_number, client_list, q, curr_loop
+            )
+
+            if totalT > (seconds - retest_tolerance) and totalT < (
+                seconds + retest_tolerance
+            ):
+                break
+            else:
+                print(
+                    "###totalT:%d is far way from expected seconds:%d. Run again ->j:%d!"
+                    % (totalT, seconds, j)
+                )
+
+        print(
+            "###Completed! -> ID: %s, clientN: %d, totalT: %.2f s, latencyAVG: %.2f ms, P95: %.2f ms, QPS_Final: %.2f"
+            % (
+                queries_id[query_index],
+                client_number,
+                totalT,
+                totalT * 1000 / (curr_loop * client_number),
+                totalP95,
+                ((curr_loop * client_number) / totalT),
+            )
+        )
+        query_index += 1
+    print("###Finished!")
--- a/contrib/qpl-cmake/benchmark_sample/client_scripts/queries_ssb.sql
+++ b/contrib/qpl-cmake/benchmark_sample/client_scripts/queries_ssb.sql
@ -0,0 +1,10 @@
+Q1.1$SELECT sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue FROM lineorder_flat WHERE toYear(LO_ORDERDATE) = 1993 AND LO_DISCOUNT BETWEEN 1 AND 3 AND LO_QUANTITY < 25;
+Q2.1$SELECT sum(LO_REVENUE),toYear(LO_ORDERDATE) AS year,P_BRAND FROM lineorder_flat WHERE P_CATEGORY = 'MFGR#12' AND S_REGION = 'AMERICA' GROUP BY year,P_BRAND ORDER BY year,P_BRAND;
+Q2.2$SELECT sum(LO_REVENUE),toYear(LO_ORDERDATE) AS year,P_BRAND FROM lineorder_flat WHERE P_BRAND >= 'MFGR#2221' AND P_BRAND <= 'MFGR#2228' AND S_REGION = 'ASIA' GROUP BY year,P_BRAND ORDER BY year,P_BRAND;
+Q2.3$SELECT sum(LO_REVENUE),toYear(LO_ORDERDATE) AS year,P_BRAND FROM lineorder_flat WHERE P_BRAND = 'MFGR#2239' AND S_REGION = 'EUROPE' GROUP BY year,P_BRAND ORDER BY year,P_BRAND;
+Q3.1$SELECT C_NATION,S_NATION,toYear(LO_ORDERDATE) AS year,sum(LO_REVENUE) AS revenue FROM lineorder_flat WHERE C_REGION = 'ASIA' AND S_REGION = 'ASIA' AND year >= 1992 AND year <= 1997 GROUP BY C_NATION,S_NATION,year ORDER BY year ASC,revenue DESC;
+Q3.2$SELECT C_CITY,S_CITY,toYear(LO_ORDERDATE) AS year,sum(LO_REVENUE) AS revenue FROM lineorder_flat WHERE C_NATION = 'UNITED STATES' AND S_NATION = 'UNITED STATES' AND year >= 1992 AND year <= 1997 GROUP BY C_CITY,S_CITY,year ORDER BY year ASC,revenue DESC;
+Q3.3$SELECT C_CITY,S_CITY,toYear(LO_ORDERDATE) AS year,sum(LO_REVENUE) AS revenue FROM lineorder_flat WHERE (C_CITY = 'UNITED KI1' OR C_CITY = 'UNITED KI5') AND (S_CITY = 'UNITED KI1' OR S_CITY = 'UNITED KI5') AND year >= 1992 AND year <= 1997 GROUP BY C_CITY,S_CITY,year ORDER BY year ASC,revenue DESC;
+Q4.1$SELECT toYear(LO_ORDERDATE) AS year,C_NATION,sum(LO_REVENUE - LO_SUPPLYCOST) AS profit FROM lineorder_flat WHERE C_REGION = 'AMERICA' AND S_REGION = 'AMERICA' AND (P_MFGR = 'MFGR#1' OR P_MFGR = 'MFGR#2') GROUP BY year,C_NATION ORDER BY year ASC,C_NATION ASC;
+Q4.2$SELECT toYear(LO_ORDERDATE) AS year,S_NATION,P_CATEGORY,sum(LO_REVENUE - LO_SUPPLYCOST) AS profit FROM lineorder_flat WHERE C_REGION = 'AMERICA' AND S_REGION = 'AMERICA' AND (year = 1997 OR year = 1998) AND (P_MFGR = 'MFGR#1' OR P_MFGR = 'MFGR#2') GROUP BY year,S_NATION,P_CATEGORY ORDER BY year ASC,S_NATION ASC,P_CATEGORY ASC;
+Q4.3$SELECT toYear(LO_ORDERDATE) AS year,S_CITY,P_BRAND,sum(LO_REVENUE - LO_SUPPLYCOST) AS profit FROM lineorder_flat WHERE S_NATION = 'UNITED STATES' AND (year = 1997 OR year = 1998) AND P_CATEGORY = 'MFGR#14' GROUP BY year,S_CITY,P_BRAND ORDER BY year ASC,S_CITY ASC,P_BRAND ASC;
--- a/contrib/qpl-cmake/benchmark_sample/client_scripts/run_ssb.sh
+++ b/contrib/qpl-cmake/benchmark_sample/client_scripts/run_ssb.sh
@ -0,0 +1,6 @@
+WORKING_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/.."
+if [ ! -d "${WORKING_DIR}/output" ]; then
+mkdir ${WORKING_DIR}/output
+fi
+bash allin1_ssb.sh 2 > ${WORKING_DIR}/output/run.log
+echo "Please check log in: ${WORKING_DIR}/output/run.log"
--- a/contrib/qpl-cmake/benchmark_sample/database_dir/deflate/config_deflate.xml
+++ b/contrib/qpl-cmake/benchmark_sample/database_dir/deflate/config_deflate.xml
@ -0,0 +1,49 @@
+<!-- This file was generated automatically.
+     Do not edit it: it is likely to be discarded and generated again before it's read next time.
+     Files used to generate this file:
+       config.xml      -->
+
+<!-- Config that is used when server is run without config file. --><clickhouse>
+    <logger>
+        <level>trace</level>
+        <console>true</console>
+    </logger>
+
+    <http_port>8123</http_port>
+    <tcp_port>9000</tcp_port>
+    <mysql_port>9004</mysql_port>
+
+    <path>./</path>
+
+    <uncompressed_cache_size>8589934592</uncompressed_cache_size>
+    <mark_cache_size>5368709120</mark_cache_size>
+    <mlock_executable>true</mlock_executable>
+
+    <compression>
+        <case>
+            <method>deflate_qpl</method>
+        </case>
+    </compression>
+
+    <users>
+        <default>
+            <password/>
+
+            <networks>
+                <ip>::/0</ip>
+            </networks>
+
+            <profile>default</profile>
+            <quota>default</quota>
+            <access_management>1</access_management>
+        </default>
+    </users>
+
+    <profiles>
+        <default/>
+    </profiles>
+
+    <quotas>
+        <default/>
+    </quotas>
+</clickhouse>
--- a/contrib/qpl-cmake/benchmark_sample/database_dir/deflate_s2/config_deflate_s2.xml
+++ b/contrib/qpl-cmake/benchmark_sample/database_dir/deflate_s2/config_deflate_s2.xml
@ -0,0 +1,49 @@
+<!-- This file was generated automatically.
+     Do not edit it: it is likely to be discarded and generated again before it's read next time.
+     Files used to generate this file:
+       config.xml      -->
+
+<!-- Config that is used when server is run without config file. --><clickhouse>
+    <logger>
+        <level>trace</level>
+        <console>true</console>
+    </logger>
+
+    <http_port>8124</http_port>
+    <tcp_port>9001</tcp_port>
+    <mysql_port>9005</mysql_port>
+
+    <path>./</path>
+
+    <uncompressed_cache_size>8589934592</uncompressed_cache_size>
+    <mark_cache_size>5368709120</mark_cache_size>
+    <mlock_executable>true</mlock_executable>
+
+    <compression>
+        <case>
+            <method>deflate_qpl</method>
+        </case>
+    </compression>
+
+    <users>
+        <default>
+            <password/>
+
+            <networks>
+                <ip>::/0</ip>
+            </networks>
+
+            <profile>default</profile>
+            <quota>default</quota>
+            <access_management>1</access_management>
+        </default>
+    </users>
+
+    <profiles>
+        <default/>
+    </profiles>
+
+    <quotas>
+        <default/>
+    </quotas>
+</clickhouse>
--- a/contrib/qpl-cmake/benchmark_sample/database_dir/lz4/config_lz4.xml
+++ b/contrib/qpl-cmake/benchmark_sample/database_dir/lz4/config_lz4.xml
@ -0,0 +1,49 @@
+<!-- This file was generated automatically.
+     Do not edit it: it is likely to be discarded and generated again before it's read next time.
+     Files used to generate this file:
+       config.xml      -->
+
+<!-- Config that is used when server is run without config file. --><clickhouse>
+    <logger>
+        <level>trace</level>
+        <console>true</console>
+    </logger>
+
+    <http_port>8123</http_port>
+    <tcp_port>9000</tcp_port>
+    <mysql_port>9004</mysql_port>
+
+    <path>./</path>
+
+    <uncompressed_cache_size>8589934592</uncompressed_cache_size>
+    <mark_cache_size>5368709120</mark_cache_size>
+    <mlock_executable>true</mlock_executable>
+
+    <compression>
+        <case>
+            <method>lz4</method>
+        </case>
+    </compression>
+
+    <users>
+        <default>
+            <password/>
+
+            <networks>
+                <ip>::/0</ip>
+            </networks>
+
+            <profile>default</profile>
+            <quota>default</quota>
+            <access_management>1</access_management>
+        </default>
+    </users>
+
+    <profiles>
+        <default/>
+    </profiles>
+
+    <quotas>
+        <default/>
+    </quotas>
+</clickhouse>
--- a/contrib/qpl-cmake/benchmark_sample/database_dir/lz4_s2/config_lz4_s2.xml
+++ b/contrib/qpl-cmake/benchmark_sample/database_dir/lz4_s2/config_lz4_s2.xml
@ -0,0 +1,49 @@
+<!-- This file was generated automatically.
+     Do not edit it: it is likely to be discarded and generated again before it's read next time.
+     Files used to generate this file:
+       config.xml      -->
+
+<!-- Config that is used when server is run without config file. --><clickhouse>
+    <logger>
+        <level>trace</level>
+        <console>true</console>
+    </logger>
+
+    <http_port>8124</http_port>
+    <tcp_port>9001</tcp_port>
+    <mysql_port>9005</mysql_port>
+
+    <path>./</path>
+
+    <uncompressed_cache_size>8589934592</uncompressed_cache_size>
+    <mark_cache_size>5368709120</mark_cache_size>
+    <mlock_executable>true</mlock_executable>
+
+    <compression>
+        <case>
+            <method>lz4</method>
+        </case>
+    </compression>
+
+    <users>
+        <default>
+            <password/>
+
+            <networks>
+                <ip>::/0</ip>
+            </networks>
+
+            <profile>default</profile>
+            <quota>default</quota>
+            <access_management>1</access_management>
+        </default>
+    </users>
+
+    <profiles>
+        <default/>
+    </profiles>
+
+    <quotas>
+        <default/>
+    </quotas>
+</clickhouse>
--- a/contrib/qpl-cmake/benchmark_sample/database_dir/zstd/config_zstd.xml
+++ b/contrib/qpl-cmake/benchmark_sample/database_dir/zstd/config_zstd.xml
@ -0,0 +1,49 @@
+<!-- This file was generated automatically.
+     Do not edit it: it is likely to be discarded and generated again before it's read next time.
+     Files used to generate this file:
+       config.xml      -->
+
+<!-- Config that is used when server is run without config file. --><clickhouse>
+    <logger>
+        <level>trace</level>
+        <console>true</console>
+    </logger>
+
+    <http_port>8123</http_port>
+    <tcp_port>9000</tcp_port>
+    <mysql_port>9004</mysql_port>
+
+    <path>./</path>
+
+    <uncompressed_cache_size>8589934592</uncompressed_cache_size>
+    <mark_cache_size>5368709120</mark_cache_size>
+    <mlock_executable>true</mlock_executable>
+
+    <compression>
+        <case>
+            <method>zstd</method>
+        </case>
+    </compression>
+
+    <users>
+        <default>
+            <password/>
+
+            <networks>
+                <ip>::/0</ip>
+            </networks>
+
+            <profile>default</profile>
+            <quota>default</quota>
+            <access_management>1</access_management>
+        </default>
+    </users>
+
+    <profiles>
+        <default/>
+    </profiles>
+
+    <quotas>
+        <default/>
+    </quotas>
+</clickhouse>
--- a/contrib/qpl-cmake/benchmark_sample/database_dir/zstd_s2/config_zstd_s2.xml
+++ b/contrib/qpl-cmake/benchmark_sample/database_dir/zstd_s2/config_zstd_s2.xml
@ -0,0 +1,49 @@
+<!-- This file was generated automatically.
+     Do not edit it: it is likely to be discarded and generated again before it's read next time.
+     Files used to generate this file:
+       config.xml      -->
+
+<!-- Config that is used when server is run without config file. --><clickhouse>
+    <logger>
+        <level>trace</level>
+        <console>true</console>
+    </logger>
+
+    <http_port>8124</http_port>
+    <tcp_port>9001</tcp_port>
+    <mysql_port>9005</mysql_port>
+
+    <path>./</path>
+
+    <uncompressed_cache_size>8589934592</uncompressed_cache_size>
+    <mark_cache_size>5368709120</mark_cache_size>
+    <mlock_executable>true</mlock_executable>
+
+    <compression>
+        <case>
+            <method>zstd</method>
+        </case>
+    </compression>
+
+    <users>
+        <default>
+            <password/>
+
+            <networks>
+                <ip>::/0</ip>
+            </networks>
+
+            <profile>default</profile>
+            <quota>default</quota>
+            <access_management>1</access_management>
+        </default>
+    </users>
+
+    <profiles>
+        <default/>
+    </profiles>
+
+    <quotas>
+        <default/>
+    </quotas>
+</clickhouse>
--- a/contrib/vectorscan
+++ b/contrib/vectorscan
@ -1 +1 @@
-Subproject commit f6250ae3e5a3085000239313ad0689cc1e00cdc2
+Subproject commit b4bba94b1a250603b0b198e0394946e32f6c3f30
--- a/docker/test/performance-comparison/download.sh
+++ b/docker/test/performance-comparison/download.sh
@ -3,7 +3,9 @@ set -ex
 set -o pipefail
 trap "exit" INT TERM
 trap 'kill $(jobs -pr) ||:' EXIT
+S3_URL=${S3_URL:="https://clickhouse-builds.s3.amazonaws.com"}
 BUILD_NAME=${BUILD_NAME:-package_release}
+export S3_URL BUILD_NAME

 mkdir db0 ||:
 mkdir left ||:
@ -28,8 +30,9 @@ function download
    # Historically there were various paths for the performance test package.
    # Test all of them.
    declare -a urls_to_try=(
-        "https://s3.amazonaws.com/clickhouse-builds/$left_pr/$left_sha/$BUILD_NAME/performance.tar.zst"
-        "https://s3.amazonaws.com/clickhouse-builds/$left_pr/$left_sha/$BUILD_NAME/performance.tgz"
+        "$S3_URL/PRs/$left_pr/$left_sha/$BUILD_NAME/performance.tar.zst"
+        "$S3_URL/$left_pr/$left_sha/$BUILD_NAME/performance.tar.zst"
+        "$S3_URL/$left_pr/$left_sha/$BUILD_NAME/performance.tgz"
    )

    for path in "${urls_to_try[@]}"
--- a/docker/test/performance-comparison/entrypoint.sh
+++ b/docker/test/performance-comparison/entrypoint.sh
@ -6,11 +6,7 @@ export CHPC_CHECK_START_TIMESTAMP

 S3_URL=${S3_URL:="https://clickhouse-builds.s3.amazonaws.com"}
 BUILD_NAME=${BUILD_NAME:-package_release}
-
-COMMON_BUILD_PREFIX="/clickhouse_build_check"
-if [[ $S3_URL == *"s3.amazonaws.com"* ]]; then
-    COMMON_BUILD_PREFIX=""
-fi
+export S3_URL BUILD_NAME

 # Sometimes AWS responde with DNS error and it's impossible to retry it with
 # current curl version options.
@ -66,8 +62,9 @@ function find_reference_sha
        # test all of them.
        unset found
        declare -a urls_to_try=(
-            "https://s3.amazonaws.com/clickhouse-builds/0/$REF_SHA/$BUILD_NAME/performance.tar.zst"
-            "https://s3.amazonaws.com/clickhouse-builds/0/$REF_SHA/$BUILD_NAME/performance.tgz"
+            "$S3_URL/PRs/0/$REF_SHA/$BUILD_NAME/performance.tar.zst"
+            "$S3_URL/0/$REF_SHA/$BUILD_NAME/performance.tar.zst"
+            "$S3_URL/0/$REF_SHA/$BUILD_NAME/performance.tgz"
        )
        for path in "${urls_to_try[@]}"
        do
@ -92,10 +89,15 @@ chmod 777 workspace output
 cd workspace

 # Download the package for the version we are going to test.
-if curl_with_retry "$S3_URL/$PR_TO_TEST/$SHA_TO_TEST$COMMON_BUILD_PREFIX/$BUILD_NAME/performance.tar.zst"
-then
-    right_path="$S3_URL/$PR_TO_TEST/$SHA_TO_TEST$COMMON_BUILD_PREFIX/$BUILD_NAME/performance.tar.zst"
-fi
+# A temporary solution for migrating into PRs directory
+for prefix in "$S3_URL/PRs" "$S3_URL";
+do
+    if curl_with_retry "$prefix/$PR_TO_TEST/$SHA_TO_TEST/$BUILD_NAME/performance.tar.zst"
+    then
+        right_path="$prefix/$PR_TO_TEST/$SHA_TO_TEST/$BUILD_NAME/performance.tar.zst"
+        break
+    fi
+done

 mkdir right
 wget -nv -nd -c "$right_path" -O- | tar -C right --no-same-owner --strip-components=1 --zstd --extract --verbose
--- a/docker/test/performance-comparison/perf.py
+++ b/docker/test/performance-comparison/perf.py
@ -26,6 +26,7 @@ logging.basicConfig(
 total_start_seconds = time.perf_counter()
 stage_start_seconds = total_start_seconds

+
 # Thread executor that does not hides exception that happens during function
 # execution, and rethrows it after join()
 class SafeThread(Thread):
@ -158,6 +159,7 @@ for e in subst_elems:

    available_parameters[name] = values

+
 # Takes parallel lists of templates, substitutes them with all combos of
 # parameters. The set of parameters is determined based on the first list.
 # Note: keep the order of queries -- sometimes we have DROP IF EXISTS
--- a/docker/test/performance-comparison/report.py
+++ b/docker/test/performance-comparison/report.py
@ -670,7 +670,6 @@ if args.report == "main":
    )

 elif args.report == "all-queries":
-
    print((header_template.format()))

    add_tested_commits()
--- a/docker/test/stateful/run.sh
+++ b/docker/test/stateful/run.sh
@ -128,7 +128,7 @@ function run_tests()
    set +e

    if [[ -n "$USE_PARALLEL_REPLICAS" ]] && [[ "$USE_PARALLEL_REPLICAS" -eq 1 ]]; then
-        clickhouse-test --client="clickhouse-client --use_hedged_requests=0  --allow_experimental_parallel_reading_from_replicas=1 \
+        clickhouse-test --client="clickhouse-client --use_hedged_requests=0  --allow_experimental_parallel_reading_from_replicas=1 --parallel_replicas_for_non_replicated_merge_tree=1 \
            --max_parallel_replicas=100 --cluster_for_parallel_replicas='parallel_replicas'" \
            -j 2 --testname --shard --zookeeper --check-zookeeper-session --no-stateless --no-parallel-replicas --hung-check --print-time "${ADDITIONAL_OPTIONS[@]}" \
        "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
--- a/docker/test/stateful/s3downloader
+++ b/docker/test/stateful/s3downloader
@ -10,31 +10,38 @@ import requests
 import tempfile


-DEFAULT_URL = 'https://clickhouse-datasets.s3.amazonaws.com'
+DEFAULT_URL = "https://clickhouse-datasets.s3.amazonaws.com"

 AVAILABLE_DATASETS = {
-    'hits': 'hits_v1.tar',
-    'visits': 'visits_v1.tar',
+    "hits": "hits_v1.tar",
+    "visits": "visits_v1.tar",
 }

 RETRIES_COUNT = 5

+
 def _get_temp_file_name():
-    return os.path.join(tempfile._get_default_tempdir(), next(tempfile._get_candidate_names()))
+    return os.path.join(
+        tempfile._get_default_tempdir(), next(tempfile._get_candidate_names())
+    )
+

 def build_url(base_url, dataset):
-    return os.path.join(base_url, dataset, 'partitions', AVAILABLE_DATASETS[dataset])
+    return os.path.join(base_url, dataset, "partitions", AVAILABLE_DATASETS[dataset])
+

 def dowload_with_progress(url, path):
    logging.info("Downloading from %s to temp path %s", url, path)
    for i in range(RETRIES_COUNT):
        try:
-            with open(path, 'wb') as f:
+            with open(path, "wb") as f:
                response = requests.get(url, stream=True)
                response.raise_for_status()
-                total_length = response.headers.get('content-length')
+                total_length = response.headers.get("content-length")
                if total_length is None or int(total_length) == 0:
-                    logging.info("No content-length, will download file without progress")
+                    logging.info(
+                        "No content-length, will download file without progress"
+                    )
                    f.write(response.content)
                else:
                    dl = 0
@ -46,7 +53,11 @@ def dowload_with_progress(url, path):
                        if sys.stdout.isatty():
                            done = int(50 * dl / total_length)
                            percent = int(100 * float(dl) / total_length)
-                            sys.stdout.write("\r[{}{}] {}%".format('=' * done, ' ' * (50-done), percent))
+                            sys.stdout.write(
+                                "\r[{}{}] {}%".format(
+                                    "=" * done, " " * (50 - done), percent
+                                )
+                            )
                            sys.stdout.flush()
            break
        except Exception as ex:
@ -56,14 +67,21 @@ def dowload_with_progress(url, path):
            if os.path.exists(path):
                os.remove(path)
    else:
-        raise Exception("Cannot download dataset from {}, all retries exceeded".format(url))
+        raise Exception(
+            "Cannot download dataset from {}, all retries exceeded".format(url)
+        )

    sys.stdout.write("\n")
    logging.info("Downloading finished")

+
 def unpack_to_clickhouse_directory(tar_path, clickhouse_path):
-    logging.info("Will unpack data from temp path %s to clickhouse db %s", tar_path, clickhouse_path)
-    with tarfile.open(tar_path, 'r') as comp_file:
+    logging.info(
+        "Will unpack data from temp path %s to clickhouse db %s",
+        tar_path,
+        clickhouse_path,
+    )
+    with tarfile.open(tar_path, "r") as comp_file:
        comp_file.extractall(path=clickhouse_path)
    logging.info("Unpack finished")

@ -72,15 +90,21 @@ if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)

    parser = argparse.ArgumentParser(
-        description="Simple tool for dowloading datasets for clickhouse from S3")
+        description="Simple tool for dowloading datasets for clickhouse from S3"
+    )

-    parser.add_argument('--dataset-names', required=True, nargs='+', choices=list(AVAILABLE_DATASETS.keys()))
-    parser.add_argument('--url-prefix', default=DEFAULT_URL)
-    parser.add_argument('--clickhouse-data-path', default='/var/lib/clickhouse/')
+    parser.add_argument(
+        "--dataset-names",
+        required=True,
+        nargs="+",
+        choices=list(AVAILABLE_DATASETS.keys()),
+    )
+    parser.add_argument("--url-prefix", default=DEFAULT_URL)
+    parser.add_argument("--clickhouse-data-path", default="/var/lib/clickhouse/")

    args = parser.parse_args()
    datasets = args.dataset_names
-    logging.info("Will fetch following datasets: %s", ', '.join(datasets))
+    logging.info("Will fetch following datasets: %s", ", ".join(datasets))
    for dataset in datasets:
        logging.info("Processing %s", dataset)
        temp_archive_path = _get_temp_file_name()
@ -92,10 +116,11 @@ if __name__ == "__main__":
            logging.info("Some exception occured %s", str(ex))
            raise
        finally:
-            logging.info("Will remove downloaded file %s from filesystem if it exists", temp_archive_path)
+            logging.info(
+                "Will remove downloaded file %s from filesystem if it exists",
+                temp_archive_path,
+            )
            if os.path.exists(temp_archive_path):
                os.remove(temp_archive_path)
        logging.info("Processing of %s finished", dataset)
    logging.info("Fetch finished, enjoy your tables!")
-
-
--- a/docker/test/stateless/run.sh
+++ b/docker/test/stateless/run.sh
@ -170,6 +170,7 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]
 fi

 rg -Fa "<Fatal>" /var/log/clickhouse-server/clickhouse-server.log ||:
+rg -A50 -Fa "============" /var/log/clickhouse-server/stderr.log ||:
 zstd --threads=0 < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.zst &

 # Compress tables.
--- a/docker/test/stress/run.sh
+++ b/docker/test/stress/run.sh
@ -41,6 +41,9 @@ if [ "$is_tsan_build" -eq "0" ]; then
    export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US=10000
    export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US=10000
    export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US=10000
+
+    export THREAD_FUZZER_EXPLICIT_SLEEP_PROBABILITY=0.01
+    export THREAD_FUZZER_EXPLICIT_MEMORY_EXCEPTION_PROBABILITY=0.01
 fi

 export ZOOKEEPER_FAULT_INJECTION=1
--- a/docker/test/style/Dockerfile
+++ b/docker/test/style/Dockerfile
@ -11,13 +11,14 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
    aspell \
    curl \
    git \
+    file \
    libxml2-utils \
    moreutils \
    python3-fuzzywuzzy \
    python3-pip \
    shellcheck \
    yamllint \
-    && pip3 install black==22.8.0 boto3 codespell==2.2.1 dohq-artifactory mypy PyGithub unidiff pylint==2.6.2 \
+    && pip3 install black==23.1.0 boto3 codespell==2.2.1 dohq-artifactory mypy PyGithub unidiff pylint==2.6.2 \
    && apt-get clean \
    && rm -rf /root/.cache/pip

--- a/docs/en/development/build.md
+++ b/docs/en/development/build.md
@ -159,4 +159,3 @@ The CI checks build the binaries on each commit to [ClickHouse](https://github.c
 1. Find the type of package for your operating system that you need and download the files.

 ![build artifact check](images/find-build-artifact.png)
-
--- a/docs/en/development/building_and_benchmarking_deflate_qpl.md
+++ b/docs/en/development/building_and_benchmarking_deflate_qpl.md
@ -0,0 +1,283 @@
+---
+slug: /en/development/building_and_benchmarking_deflate_qpl
+sidebar_position: 73
+sidebar_label: Building and Benchmarking DEFLATE_QPL
+description: How to build Clickhouse and run benchmark with DEFLATE_QPL Codec
+---
+# Build Clickhouse with DEFLATE_QPL
+- Make sure your target machine meet the QPL required [Prerequisites](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#prerequisites)
+- Pass the following flag to CMake when building ClickHouse, depending on the capabilities of your target machine:
+``` bash
+cmake -DENABLE_AVX2=1 -DENABLE_QPL=1 ..
+```
+or
+``` bash
+cmake -DENABLE_AVX512=1 -DENABLE_QPL=1 ..
+```
+- For generic requirements, please refer to Clickhouse generic [build instructions](/docs/en/development/build.md)
+
+# Run Benchmark with DEFLATE_QPL
+## Files list
+The folders `benchmark_sample` under [qpl-cmake](https://github.com/ClickHouse/ClickHouse/tree/master/contrib/qpl-cmake) give example to run benchmark with python scripts:
+
+`client_scripts` contains python scripts for running typical benchmark, for example:
+- `client_stressing_test.py`: The python script for query stress test with [1~4] server instances.
+- `queries_ssb.sql`: The file lists all queries for [Star Schema Benchmark](https://clickhouse.com/docs/en/getting-started/example-datasets/star-schema/)
+- `allin1_ssb.sh`: This shell script executes benchmark workflow all in one automatically.
+
+`database_files` means it will store database files according to lz4/deflate/zstd codec.
+
+## Run benchmark automatically for Star Schema:
+``` bash
+$ cd ./benchmark_sample/client_scripts
+$ sh run_ssb.sh
+```
+After complete, please check all the results in this folder:`./output/`
+
+In case you run into failure, please manually run benchmark as below sections.
+
+## Definition
+[CLICKHOUSE_EXE] means the path of clickhouse executable program.
+
+## Environment
+- CPU: Sapphire Rapid
+- OS Requirements refer to [System Requirements for QPL](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#system-requirements)
+- IAA Setup refer to [Accelerator Configuration](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#accelerator-configuration)
+- Install python modules:
+``` bash
+pip3 install clickhouse_driver numpy
+```
+[Self-check for IAA]
+``` bash
+$ accel-config list | grep -P 'iax|state'
+```
+Expected output like this:
+``` bash
+    "dev":"iax1",
+    "state":"enabled",
+            "state":"enabled",
+```
+If you see nothing output, it means IAA is not ready to work. Please check IAA setup again.
+
+## Generate raw data
+``` bash
+$ cd ./benchmark_sample
+$ mkdir rawdata_dir && cd rawdata_dir
+```
+Use [`dbgen`](https://clickhouse.com/docs/en/getting-started/example-datasets/star-schema) to generate 100 million rows data with the parameters:
+-s 20
+
+The files like `*.tbl` are expected to output under `./benchmark_sample/rawdata_dir/ssb-dbgen`:
+
+## Database setup
+Set up database with LZ4 codec
+
+``` bash
+$ cd ./database_dir/lz4
+$ [CLICKHOUSE_EXE] server -C config_lz4.xml >&/dev/null&
+$ [CLICKHOUSE_EXE] client
+```
+Here you should see the message `Connected to ClickHouse server` from console which means client successfully setup connection with server.
+
+Complete below three steps mentioned in [Star Schema Benchmark](https://clickhouse.com/docs/en/getting-started/example-datasets/star-schema)
+- Creating tables in ClickHouse
+- Inserting data. Here should use `./benchmark_sample/rawdata_dir/ssb-dbgen/*.tbl` as input data.
+- Converting “star schema” to de-normalized “flat schema”
+
+Set up database with with IAA Deflate codec
+
+``` bash
+$ cd ./database_dir/deflate
+$ [CLICKHOUSE_EXE] server -C config_deflate.xml >&/dev/null&
+$ [CLICKHOUSE_EXE] client
+```
+Complete three steps same as lz4 above
+
+Set up database with with ZSTD codec
+
+``` bash
+$ cd ./database_dir/zstd
+$ [CLICKHOUSE_EXE] server -C config_zstd.xml >&/dev/null&
+$ [CLICKHOUSE_EXE] client
+```
+Complete three steps same as lz4 above
+
+[self-check]
+For each codec(lz4/zstd/deflate), please execute below query to make sure the databases are created successfully:
+```sql
+select count() from lineorder_flat
+```
+You are expected to see below output:
+```sql
+┌───count()─┐
+│ 119994608 │
+└───────────┘
+```
+[Self-check for IAA Deflate codec]
+At the first time you execute insertion or query from client, clickhouse server console is expected to print this log:
+```text
+Hardware-assisted DeflateQpl codec is ready!
+```
+If you never find this, but see another log as below:
+```text
+Initialization of hardware-assisted DeflateQpl codec failed
+```
+That means IAA devices is not ready, you need check IAA setup again.
+
+## Benchmark with single instance 
+- Before start benchmark, Please disable C6 and set CPU frequency governor to be `performance`
+``` bash
+$ cpupower idle-set -d 3
+$ cpupower frequency-set -g performance
+```
+- To eliminate impact of memory bound on cross sockets, we use `numactl` to bind server on one socket and client on another socket.
+- Single instance means single server connected with single client
+
+Now run benchmark for LZ4/Deflate/ZSTD respectively:
+
+LZ4:
+``` bash
+$ cd ./database_dir/lz4 
+$ numactl -m 0 -N 0 [CLICKHOUSE_EXE] server -C config_lz4.xml >&/dev/null&
+$ cd ./client_scripts
+$ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 1 > lz4.log
+```
+
+IAA deflate:
+``` bash
+$ cd ./database_dir/deflate
+$ numactl -m 0 -N 0 [CLICKHOUSE_EXE] server -C config_deflate.xml >&/dev/null&
+$ cd ./client_scripts
+$ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 1 > deflate.log
+```
+ZSTD:
+``` bash
+$ cd ./database_dir/zstd
+$ numactl -m 0 -N 0 [CLICKHOUSE_EXE] server -C config_zstd.xml >&/dev/null&
+$ cd ./client_scripts
+$ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 1 > zstd.log
+```
+
+Now three logs should be output as expected:
+```text
+lz4.log
+deflate.log
+zstd.log
+```
+
+How to check performance metrics:
+
+We focus on QPS, please search the keyword: `QPS_Final` and collect statistics
+
+## Benchmark with multi-instances
+- To reduce impact of memory bound on too much threads, We recommend run benchmark with multi-instances.
+- Multi-instance means multiple（2 or 4）servers connected with respective client.
+- The cores of one socket need to be divided equally and assigned to the servers respectively.
+- For multi-instances, must create new folder for each codec and insert dataset by following the similar steps as single instance.
+
+There are 2 differences: 
+- For client side, you need launch clickhouse with the assigned port during table creation and data insertion.
+- For server side, you need launch clickhouse with the specific xml config file in which port has been assigned. All customized xml config files for multi-instances has been provided under ./server_config.
+
+Here we assume there are 60 cores per socket and take 2 instances for example.
+Launch server for first instance
+LZ4:
+``` bash
+$ cd ./database_dir/lz4
+$ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_lz4.xml >&/dev/null&
+```
+ZSTD:
+``` bash
+$ cd ./database_dir/zstd
+$ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_zstd.xml >&/dev/null&
+```
+IAA Deflate:
+``` bash
+$ cd ./database_dir/deflate
+$ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_deflate.xml >&/dev/null&
+```
+[Launch server for second instance]
+
+LZ4:
+``` bash
+$ cd ./database_dir && mkdir lz4_s2 && cd lz4_s2
+$ cp ../../server_config/config_lz4_s2.xml ./
+$ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_lz4_s2.xml >&/dev/null&
+```
+ZSTD:
+``` bash
+$ cd ./database_dir && mkdir zstd_s2 && cd zstd_s2
+$ cp ../../server_config/config_zstd_s2.xml ./
+$ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_zstd_s2.xml >&/dev/null&
+```
+IAA Deflate:
+``` bash
+$ cd ./database_dir && mkdir deflate_s2 && cd deflate_s2
+$ cp ../../server_config/config_deflate_s2.xml ./
+$ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_deflate_s2.xml >&/dev/null&
+```
+
+Creating tables && Inserting data for second instance
+
+Creating tables:
+``` bash
+$ [CLICKHOUSE_EXE] client -m --port=9001 
+```
+Inserting data:
+``` bash
+$ [CLICKHOUSE_EXE] client --query "INSERT INTO [TBL_FILE_NAME] FORMAT CSV" < [TBL_FILE_NAME].tbl  --port=9001
+```
+- [TBL_FILE_NAME] represents the name of a file named with the regular expression: *. tbl under `./benchmark_sample/rawdata_dir/ssb-dbgen`.
+- `--port=9001` stands for the assigned port for server instance which is also defined in config_lz4_s2.xml/config_zstd_s2.xml/config_deflate_s2.xml. For even more instances, you need replace it with the value: 9002/9003 which stand for s3/s4 instance respectively. If you don't assign it, the port is 9000 by default which has been used by first instance.
+
+Benchmarking with 2 instances
+
+LZ4:
+``` bash
+$ cd ./database_dir/lz4
+$ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_lz4.xml >&/dev/null&
+$ cd ./database_dir/lz4_s2
+$ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_lz4_s2.xml >&/dev/null&
+$ cd ./client_scripts
+$ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 2  > lz4_2insts.log
+```
+ZSTD:
+``` bash
+$ cd ./database_dir/zstd
+$ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_zstd.xml >&/dev/null&
+$ cd ./database_dir/zstd_s2
+$ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_zstd_s2.xml >&/dev/null& 
+$ cd ./client_scripts
+$ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 2 > zstd_2insts.log
+```
+IAA deflate
+``` bash
+$ cd ./database_dir/deflate
+$ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_deflate.xml >&/dev/null&
+$ cd ./database_dir/deflate_s2
+$ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_deflate_s2.xml >&/dev/null&
+$ cd ./client_scripts
+$ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 2 > deflate_2insts.log
+```
+Here the last argument: `2` of client_stressing_test.py stands for the number of instances. For more instances, you need replace it with the value: 3 or 4. This script support up to 4 instances/
+
+Now three logs should be output as expected:
+``` text
+lz4_2insts.log
+deflate_2insts.log
+zstd_2insts.log
+```
+How to check performance metrics:
+
+We focus on QPS, please search the keyword: `QPS_Final` and collect statistics
+
+Benchmark setup for 4 instances is similar with 2 instances above.
+We recommend use 2 instances benchmark data as final report for review.
+
+## Tips
+Each time before launch new clickhouse server, please make sure no background clickhouse process running, please check and kill old one:
+``` bash
+$ ps -aux| grep clickhouse
+$ kill -9 [PID]
+```
+By comparing the query list in ./client_scripts/queries_ssb.sql with official [Star Schema Benchmark](https://clickhouse.com/docs/en/getting-started/example-datasets/star-schema), you will find 3 queries are not included: Q1.2/Q1.3/Q3.4 . This is because cpu utilization% is very low <10% for these queries which means cannot demonstrate performance differences.
--- a/docs/en/development/tests.md
+++ b/docs/en/development/tests.md
@ -71,7 +71,7 @@ SELECT 1
 | `global` | Same as `shard`. Prefer `shard` ||
 | `zookeeper` | Test requires Zookeeper or ClickHouse Keeper to run | Test uses `ReplicatedMergeTree` |
 | `replica` | Same as `zookeeper`. Prefer `zookeeper` ||
-| `no-fasttest`|  Test is not run under [Fast test](continuous-integration#fast-test) | Test uses `MySQL` table engine which is disabled in Fast test|
+| `no-fasttest`|  Test is not run under [Fast test](continuous-integration.md#fast-test) | Test uses `MySQL` table engine which is disabled in Fast test|
 | `no-[asan, tsan, msan, ubsan]` | Disables tests in build with [sanitizers](#sanitizers) | Test is run under QEMU which doesn't work with sanitizers |
 | `no-replicated-database` |||
 | `no-ordinary-database` |||
--- a/docs/en/engines/_category_.yml
+++ b/docs/en/engines/_category_.yml
@ -4,5 +4,4 @@ collapsible: true
 collapsed: true
 link:
  type: generated-index
-  title: Database & Table Engines
  slug: /en/engines
--- a/docs/en/engines/table-engines/integrations/mysql.md
+++ b/docs/en/engines/table-engines/integrations/mysql.md
@ -180,4 +180,4 @@ Default value: `300`.
 ## See Also {#see-also}

 -   [The mysql table function](../../../sql-reference/table-functions/mysql.md)
-   [Using MySQL as a dictionary source](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-mysql)
+-   [Using MySQL as a dictionary source](../../../sql-reference/dictionaries/index.md#dictionary-sources#dicts-external_dicts_dict_sources-mysql)
--- a/docs/en/engines/table-engines/integrations/odbc.md
+++ b/docs/en/engines/table-engines/integrations/odbc.md
@ -126,5 +126,5 @@ SELECT * FROM odbc_t

 ## See Also {#see-also}

-   [ODBC dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-odbc)
+-   [ODBC dictionaries](../../../sql-reference/dictionaries/index.md#dictionary-sources#dicts-external_dicts_dict_sources-odbc)
 -   [ODBC table function](../../../sql-reference/table-functions/odbc.md)
--- a/docs/en/engines/table-engines/integrations/postgresql.md
+++ b/docs/en/engines/table-engines/integrations/postgresql.md
@ -174,7 +174,7 @@ CREATE TABLE pg_table_schema_with_dots (a UInt32)
 **See Also**

 -   [The `postgresql` table function](../../../sql-reference/table-functions/postgresql.md)
-   [Using PostgreSQL as a dictionary source](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-postgresql)
+-   [Using PostgreSQL as a dictionary source](../../../sql-reference/dictionaries/index.md#dictionary-sources#dicts-external_dicts_dict_sources-postgresql)

 ## Related content
 - Blog: [ClickHouse and PostgreSQL - a match made in data heaven - part 1](https://clickhouse.com/blog/migrating-data-between-clickhouse-postgres)
--- a/docs/en/engines/table-engines/integrations/s3.md
+++ b/docs/en/engines/table-engines/integrations/s3.md
@ -150,6 +150,7 @@ The following settings can be specified in configuration file for given endpoint
 -   `use_environment_credentials` — If set to `true`, S3 client will try to obtain credentials from environment variables and [Amazon EC2](https://en.wikipedia.org/wiki/Amazon_Elastic_Compute_Cloud) metadata for given endpoint. Optional, default value is `false`.
 -   `region` — Specifies S3 region name. Optional.
 -   `use_insecure_imds_request` — If set to `true`, S3 client will use insecure IMDS request while obtaining credentials from Amazon EC2 metadata. Optional, default value is `false`.
+-   `expiration_window_seconds` — Grace period for checking if expiration-based credentials have expired. Optional, default value is `120`.
 -   `header` —  Adds specified HTTP header to a request to given endpoint. Optional, can be specified multiple times.
 -   `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set. Optional.
 -   `max_single_read_retries` — The maximum number of attempts during single read. Default value is `4`. Optional.
@ -166,6 +167,7 @@ The following settings can be specified in configuration file for given endpoint
        <!-- <region>us-west-1</region> -->
        <!-- <use_environment_credentials>false</use_environment_credentials> -->
        <!-- <use_insecure_imds_request>false</use_insecure_imds_request> -->
+        <!-- <expiration_window_seconds>120</expiration_window_seconds> -->
        <!-- <header>Authorization: Bearer SOME-TOKEN</header> -->
        <!-- <server_side_encryption_customer_key_base64>BASE64-ENCODED-KEY</server_side_encryption_customer_key_base64> -->
        <!-- <max_single_read_retries>4</max_single_read_retries> -->
--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@ -377,8 +377,9 @@ CREATE TABLE table_name
    i32 Int32,
    s String,
    ...
-    INDEX a (u64 * i32, s) TYPE minmax GRANULARITY 3,
-    INDEX b (u64 * length(s)) TYPE set(1000) GRANULARITY 4
+    INDEX idx1 u64 TYPE bloom_filter GRANULARITY 3,
+    INDEX idx2 u64 * i32 TYPE minmax GRANULARITY 3,
+    INDEX idx3 u64 * length(s) TYPE set(1000) GRANULARITY 4
 ) ENGINE = MergeTree()
 ...
 ```
@ -386,8 +387,25 @@ CREATE TABLE table_name
 Indices from the example can be used by ClickHouse to reduce the amount of data to read from disk in the following queries:

 ``` sql
-SELECT count() FROM table WHERE s < 'z'
-SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234
+SELECT count() FROM table WHERE u64 == 10;
+SELECT count() FROM table WHERE u64 * i32 >= 1234
+SELECT count() FROM table WHERE u64 * length(s) == 1234
+```
+
+Data skipping indexes can also be created on composite columns:
+
+```sql
+-- on columns of type Map:
+INDEX map_key_index mapKeys(map_column) TYPE bloom_filter
+INDEX map_value_index mapValues(map_column) TYPE bloom_filter
+
+-- on columns of type Tuple:
+INDEX tuple_1_index tuple_column.1 TYPE bloom_filter
+INDEX tuple_2_index tuple_column.2 TYPE bloom_filter
+
+-- on columns of type Nested:
+INDEX nested_1_index col.nested_col1 TYPE bloom_filter
+INDEX nested_2_index col.nested_col2 TYPE bloom_filter
 ```

 ### Available Types of Indices {#available-types-of-indices}
@ -432,20 +450,6 @@ Syntax: `tokenbf_v1(size_of_bloom_filter_in_bytes, number_of_hash_functions, ran
 - An experimental index to support approximate nearest neighbor (ANN) search. See [here](annindexes.md) for details.
 - An experimental inverted index to support full-text search. See [here](invertedindexes.md) for details.

-## Example of index creation for Map data type
-
-```
-INDEX map_key_index mapKeys(map_column) TYPE bloom_filter GRANULARITY 1
-INDEX map_key_index mapValues(map_column) TYPE bloom_filter GRANULARITY 1
-```
-
-
-``` sql
-INDEX sample_index (u64 * length(s)) TYPE minmax GRANULARITY 4
-INDEX sample_index2 (u64 * length(str), i32 + f64 * 100, date, str) TYPE set(100) GRANULARITY 4
-INDEX sample_index3 (lower(str), str) TYPE ngrambf_v1(3, 256, 2, 0) GRANULARITY 4
-```
-
 ### Functions Support {#functions-support}

 Conditions in the `WHERE` clause contains calls of the functions that operate with columns. If the column is a part of an index, ClickHouse tries to use this index when performing the functions. ClickHouse supports different subsets of functions for using indexes.
@ -901,7 +905,7 @@ User can assign new big parts to different disks of a [JBOD](https://en.wikipedi
 ## Using S3 for Data Storage {#table_engine-mergetree-s3}

 :::note
-Google Cloud Storage (GCS) is also supported using the type `s3`. See [GCS backed MergeTree](/docs/en/integrations/data-ingestion/s3/gcs-merge-tree.md).
+Google Cloud Storage (GCS) is also supported using the type `s3`. See [GCS backed MergeTree](/docs/en/integrations/gcs).
 :::

 `MergeTree` family table engines can store data to [S3](https://aws.amazon.com/s3/) using a disk with type `s3`.
@ -960,6 +964,7 @@ Optional parameters:
 -   `support_batch_delete` — This controls the check to see if batch deletes are supported. Set this to `false` when using Google Cloud Storage (GCS) as GCS does not support batch deletes and preventing the checks will prevent error messages in the logs.
 -   `use_environment_credentials` — Reads AWS credentials from the Environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY and AWS_SESSION_TOKEN if they exist. Default value is `false`.
 -   `use_insecure_imds_request` — If set to `true`, S3 client will use insecure IMDS request while obtaining credentials from Amazon EC2 metadata. Default value is `false`.
+-   `expiration_window_seconds` — Grace period for checking if expiration-based credentials have expired. Optional, default value is `120`.
 -   `proxy` — Proxy configuration for S3 endpoint. Each `uri` element inside `proxy` block should contain a proxy URL.
 -   `connect_timeout_ms` — Socket connect timeout in milliseconds. Default value is `10 seconds`.
 -   `request_timeout_ms` — Request timeout in milliseconds. Default value is `5 seconds`.
--- a/docs/en/engines/table-engines/mergetree-family/replication.md
+++ b/docs/en/engines/table-engines/mergetree-family/replication.md
@ -39,7 +39,7 @@ Compressed data for `INSERT` and `ALTER` queries is replicated (for more informa
 -   The `DROP TABLE` query deletes the replica located on the server where the query is run.
 -   The `RENAME` query renames the table on one of the replicas. In other words, replicated tables can have different names on different replicas.

-ClickHouse uses [ClickHouse Keeper](/docs/en/guides/sre/keeper/clickhouse-keeper.md) for storing replicas meta information. It is possible to use ZooKeeper version 3.4.5 or newer, but ClickHouse Keeper is recommended.
+ClickHouse uses [ClickHouse Keeper](/docs/en/guides/sre/keeper/index.md) for storing replicas meta information. It is possible to use ZooKeeper version 3.4.5 or newer, but ClickHouse Keeper is recommended.

 To use replication, set parameters in the [zookeeper](/docs/en/operations/server-configuration-parameters/settings.md/#server-settings_zookeeper) server configuration section.

@ -144,7 +144,7 @@ ENGINE = ReplicatedReplacingMergeTree
 The `Replicated` prefix is added to the table engine name. For example:`ReplicatedMergeTree`.

 :::tip
-Adding `Replicated` is optional in ClickHouse Cloud, as all of the tables are replicated.  
+Adding `Replicated` is optional in ClickHouse Cloud, as all of the tables are replicated.
 :::

 ### Replicated\*MergeTree parameters
--- a/docs/en/engines/table-engines/special/dictionary.md
+++ b/docs/en/engines/table-engines/special/dictionary.md
@ -6,7 +6,7 @@ sidebar_label: Dictionary

 # Dictionary Table Engine

-The `Dictionary` engine displays the [dictionary](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) data as a ClickHouse table.
+The `Dictionary` engine displays the [dictionary](../../../sql-reference/dictionaries/index.md) data as a ClickHouse table.

 ## Example {#example}

--- a/docs/en/engines/table-engines/special/distributed.md
+++ b/docs/en/engines/table-engines/special/distributed.md
@ -184,7 +184,7 @@ The parameters `host`, `port`, and optionally `user`, `password`, `secure`, `com

 - `host` – The address of the remote server. You can use either the domain or the IPv4 or IPv6 address. If you specify the domain, the server makes a DNS request when it starts, and the result is stored as long as the server is running. If the DNS request fails, the server does not start. If you change the DNS record, restart the server.
 - `port` – The TCP port for messenger activity (`tcp_port` in the config, usually set to 9000). Not to be confused with `http_port`.
- `user` – Name of the user for connecting to a remote server. Default value is the `default` user. This user must have access to connect to the specified server. Access is configured in the `users.xml` file. For more information, see the section [Access rights](../../../operations/access-rights.md).
+- `user` – Name of the user for connecting to a remote server. Default value is the `default` user. This user must have access to connect to the specified server. Access is configured in the `users.xml` file. For more information, see the section [Access rights](../../../guides/sre/user-management/index.md).
 - `password` – The password for connecting to a remote server (not masked). Default value: empty string.
 - `secure` - Whether to use a secure SSL/TLS connection. Usually also requires specifying the port (the default secure port is `9440`). The server should listen on `<tcp_port_secure>9440</tcp_port_secure>` and be configured with correct certificates.
 - `compression` - Use data compression. Default value: `true`.
--- a/docs/en/engines/table-engines/special/generate.md
+++ b/docs/en/engines/table-engines/special/generate.md
@ -15,7 +15,7 @@ Usage examples:
 ## Usage in ClickHouse Server {#usage-in-clickhouse-server}

 ``` sql
-ENGINE = GenerateRandom([random_seed] [,max_string_length] [,max_array_length])
+ENGINE = GenerateRandom([random_seed [,max_string_length [,max_array_length]]])
 ```

 The `max_array_length` and `max_string_length` parameters specify maximum length of all
--- a/docs/en/getting-started/example-datasets/_category_.yml
+++ b/docs/en/getting-started/example-datasets/_category_.yml
@ -1,7 +0,0 @@
-position: 1
-label: 'Example Datasets'
-collapsible: true
-collapsed: true
-link:
-  type: doc
-  id: en/getting-started/example-datasets/
--- a/docs/en/getting-started/example-datasets/cell-towers.md
+++ b/docs/en/getting-started/example-datasets/cell-towers.md
@ -1,9 +1,10 @@
 ---
 slug: /en/getting-started/example-datasets/cell-towers
-sidebar_label: Cell Towers
+sidebar_label: Geo Data
 sidebar_position: 3
-title: "Cell Towers"
+title: "Geo Data using the Cell Tower Dataset"
 ---
+
 import ConnectionDetails from '@site/docs/en/_snippets/_gather_your_details_http.mdx';

 import Tabs from '@theme/Tabs';
@ -163,7 +164,7 @@ SELECT mcc, count() FROM cell_towers GROUP BY mcc ORDER BY count() DESC LIMIT 10

 Based on the above query and the [MCC list](https://en.wikipedia.org/wiki/Mobile_country_code), the countries with the most cell towers are: the USA, Germany, and Russia.

-You may want to create a [Dictionary](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) in ClickHouse to decode these values.
+You may want to create a [Dictionary](../../sql-reference/dictionaries/index.md) in ClickHouse to decode these values.

 ## Use case: Incorporate geo data {#use-case}

--- a/docs/en/getting-started/example-datasets/covid19.md
+++ b/docs/en/getting-started/example-datasets/covid19.md
@ -0,0 +1,265 @@
+---
+slug: /en/getting-started/example-datasets/covid19
+sidebar_label: COVID-19 Open-Data
+---
+
+# COVID-19 Open-Data
+
+COVID-19 Open-Data attempts to assemble the largest Covid-19 epidemiological database, in addition to a powerful set of expansive covariates. It includes open, publicly sourced, licensed data relating to demographics, economy, epidemiology, geography, health, hospitalizations, mobility, government response, weather, and more.
+
+The details are in GitHub [here](https://github.com/GoogleCloudPlatform/covid-19-open-data).
+
+It's easy to insert this data into ClickHouse...
+
+:::note
+The following commands were executed on a **Production** instance of [ClickHouse Cloud](https://clickhouse.cloud). You can easily run them on a local install as well.
+:::
+
+1. Let's see what the data looks like:
+
+```sql
+DESCRIBE url(
+    'https://storage.googleapis.com/covid19-open-data/v3/epidemiology.csv',
+    'CSVWithNames'
+);
+```
+
+The CSV file has 10 columns:
+
+```response
+┌─name─────────────────┬─type─────────────┐
+│ date                 │ Nullable(String) │
+│ location_key         │ Nullable(String) │
+│ new_confirmed        │ Nullable(Int64)  │
+│ new_deceased         │ Nullable(Int64)  │
+│ new_recovered        │ Nullable(Int64)  │
+│ new_tested           │ Nullable(Int64)  │
+│ cumulative_confirmed │ Nullable(Int64)  │
+│ cumulative_deceased  │ Nullable(Int64)  │
+│ cumulative_recovered │ Nullable(Int64)  │
+│ cumulative_tested    │ Nullable(Int64)  │
+└──────────────────────┴──────────────────┘
+
+10 rows in set. Elapsed: 0.745 sec.
+```
+
+2. Now let's view some of the rows:
+
+```sql
+SELECT *
+FROM url('https://storage.googleapis.com/covid19-open-data/v3/epidemiology.csv')
+LIMIT 100;
+```
+
+Notice the `url` function easily reads data from a CSV file:
+
+```response
+┌─c1─────────┬─c2───────────┬─c3────────────┬─c4───────────┬─c5────────────┬─c6─────────┬─c7───────────────────┬─c8──────────────────┬─c9───────────────────┬─c10───────────────┐
+│ date       │ location_key │ new_confirmed │ new_deceased │ new_recovered │ new_tested │ cumulative_confirmed │ cumulative_deceased │ cumulative_recovered │ cumulative_tested │
+│ 2020-04-03 │ AD           │ 24            │ 1            │ ᴺᵁᴸᴸ          │ ᴺᵁᴸᴸ       │ 466                  │ 17                  │ ᴺᵁᴸᴸ                 │ ᴺᵁᴸᴸ              │
+│ 2020-04-04 │ AD           │ 57            │ 0            │ ᴺᵁᴸᴸ          │ ᴺᵁᴸᴸ       │ 523                  │ 17                  │ ᴺᵁᴸᴸ                 │ ᴺᵁᴸᴸ              │
+│ 2020-04-05 │ AD           │ 17            │ 4            │ ᴺᵁᴸᴸ          │ ᴺᵁᴸᴸ       │ 540                  │ 21                  │ ᴺᵁᴸᴸ                 │ ᴺᵁᴸᴸ              │
+│ 2020-04-06 │ AD           │ 11            │ 1            │ ᴺᵁᴸᴸ          │ ᴺᵁᴸᴸ       │ 551                  │ 22                  │ ᴺᵁᴸᴸ                 │ ᴺᵁᴸᴸ              │
+│ 2020-04-07 │ AD           │ 15            │ 2            │ ᴺᵁᴸᴸ          │ ᴺᵁᴸᴸ       │ 566                  │ 24                  │ ᴺᵁᴸᴸ                 │ ᴺᵁᴸᴸ              │
+│ 2020-04-08 │ AD           │ 23            │ 2            │ ᴺᵁᴸᴸ          │ ᴺᵁᴸᴸ       │ 589                  │ 26                  │ ᴺᵁᴸᴸ                 │ ᴺᵁᴸᴸ              │
+└────────────┴──────────────┴───────────────┴──────────────┴───────────────┴────────────┴──────────────────────┴─────────────────────┴──────────────────────┴───────────────────┘
+```
+
+3. We will create a table now that we know what the data looks like:
+
+```sql
+CREATE TABLE covid19 (
+    date Date,
+    location_key LowCardinality(String),
+    new_confirmed Int32,
+    new_deceased Int32,
+    new_recovered Int32,
+    new_tested Int32,
+    cumulative_confirmed Int32,
+    cumulative_deceased Int32,
+    cumulative_recovered Int32,
+    cumulative_tested Int32
+)
+ENGINE = MergeTree
+ORDER BY (location_key, date);
+```
+
+4. The following command inserts the entire dataset into the `covid19` table:
+
+```sql
+INSERT INTO covid19
+   SELECT *
+   FROM
+      url(
+        'https://storage.googleapis.com/covid19-open-data/v3/epidemiology.csv',
+        CSVWithNames,
+        'date Date,
+        location_key LowCardinality(String),
+        new_confirmed Int32,
+        new_deceased Int32,
+        new_recovered Int32,
+        new_tested Int32,
+        cumulative_confirmed Int32,
+        cumulative_deceased Int32,
+        cumulative_recovered Int32,
+        cumulative_tested Int32'
+    );
+```
+
+5. It goes pretty quick - let's see how many rows were inserted:
+
+```sql
+SELECT formatReadableQuantity(count())
+FROM covid19;
+```
+
+```response
+┌─formatReadableQuantity(count())─┐
+│ 12.53 million                   │
+└─────────────────────────────────┘
+```
+
+6. Let's see how many total cases of Covid-19 were recorded:
+
+```sql
+SELECT formatReadableQuantity(sum(new_confirmed))
+FROM covid19;
+```
+
+```response
+┌─formatReadableQuantity(sum(new_confirmed))─┐
+│ 1.39 billion                               │
+└────────────────────────────────────────────┘
+```
+
+7. You will notice the data has a lot of 0's for dates - either weekends or days where numbers were not reported each day. We can use a window function to smooth out the daily averages of new cases:
+
+```sql
+SELECT
+   AVG(new_confirmed) OVER (PARTITION BY location_key ORDER BY date ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) AS cases_smoothed,
+   new_confirmed,
+   location_key,
+   date
+FROM covid19;
+```
+
+8. This query determines the latest values for each location. We can't use `max(date)` because not all countries reported every day, so we grab the last row using `ROW_NUMBER`:
+
+```sql
+WITH latest_deaths_data AS
+   ( SELECT location_key,
+            date,
+            new_deceased,
+            new_confirmed,
+            ROW_NUMBER() OVER (PARTITION BY location_key ORDER BY date DESC) as rn
+     FROM covid19)
+SELECT location_key,
+       date,
+       new_deceased,
+       new_confirmed,
+       rn
+FROM latest_deaths_data
+WHERE rn=1;
+```
+
+9. We can use `lagInFrame` to determine the `LAG` of new cases each day. In this query we filter by the `US_DC` location:
+
+```sql
+SELECT
+   new_confirmed - lagInFrame(new_confirmed,1) OVER (PARTITION BY location_key ORDER BY date) AS confirmed_cases_delta,
+   new_confirmed,
+   location_key,
+   date
+FROM covid19
+WHERE location_key = 'US_DC';
+```
+
+The response look like:
+
+```response
+┌─confirmed_cases_delta─┬─new_confirmed─┬─location_key─┬───────date─┐
+│                     0 │             0 │ US_DC        │ 2020-03-08 │
+│                     2 │             2 │ US_DC        │ 2020-03-09 │
+│                    -2 │             0 │ US_DC        │ 2020-03-10 │
+│                     6 │             6 │ US_DC        │ 2020-03-11 │
+│                    -6 │             0 │ US_DC        │ 2020-03-12 │
+│                     0 │             0 │ US_DC        │ 2020-03-13 │
+│                     6 │             6 │ US_DC        │ 2020-03-14 │
+│                    -5 │             1 │ US_DC        │ 2020-03-15 │
+│                     4 │             5 │ US_DC        │ 2020-03-16 │
+│                     4 │             9 │ US_DC        │ 2020-03-17 │
+│                    -1 │             8 │ US_DC        │ 2020-03-18 │
+│                    24 │            32 │ US_DC        │ 2020-03-19 │
+│                   -26 │             6 │ US_DC        │ 2020-03-20 │
+│                    15 │            21 │ US_DC        │ 2020-03-21 │
+│                    -3 │            18 │ US_DC        │ 2020-03-22 │
+│                     3 │            21 │ US_DC        │ 2020-03-23 │
+```
+
+10. This query calculates the percentage of change in new cases each day, and includes a simple `increase` or `decrease` column in the result set:
+
+```sql
+WITH confirmed_lag AS (
+  SELECT
+    *,
+    lagInFrame(new_confirmed) OVER(
+      PARTITION BY location_key
+      ORDER BY date
+    ) AS confirmed_previous_day
+  FROM covid19
+),
+confirmed_percent_change AS (
+  SELECT
+    *,
+    COALESCE(ROUND((new_confirmed - confirmed_previous_day) / confirmed_previous_day * 100), 0) AS percent_change
+  FROM confirmed_lag
+)
+SELECT
+  date,
+  new_confirmed,
+  percent_change,
+  CASE
+    WHEN percent_change > 0 THEN 'increase'
+    WHEN percent_change = 0 THEN 'no change'
+    ELSE 'decrease'
+  END AS trend
+FROM confirmed_percent_change
+WHERE location_key = 'US_DC';
+```
+
+The results look like
+
+```response
+┌───────date─┬─new_confirmed─┬─percent_change─┬─trend─────┐
+│ 2020-03-08 │             0 │            nan │ decrease  │
+│ 2020-03-09 │             2 │            inf │ increase  │
+│ 2020-03-10 │             0 │           -100 │ decrease  │
+│ 2020-03-11 │             6 │            inf │ increase  │
+│ 2020-03-12 │             0 │           -100 │ decrease  │
+│ 2020-03-13 │             0 │            nan │ decrease  │
+│ 2020-03-14 │             6 │            inf │ increase  │
+│ 2020-03-15 │             1 │            -83 │ decrease  │
+│ 2020-03-16 │             5 │            400 │ increase  │
+│ 2020-03-17 │             9 │             80 │ increase  │
+│ 2020-03-18 │             8 │            -11 │ decrease  │
+│ 2020-03-19 │            32 │            300 │ increase  │
+│ 2020-03-20 │             6 │            -81 │ decrease  │
+│ 2020-03-21 │            21 │            250 │ increase  │
+│ 2020-03-22 │            18 │            -14 │ decrease  │
+│ 2020-03-23 │            21 │             17 │ increase  │
+│ 2020-03-24 │            46 │            119 │ increase  │
+│ 2020-03-25 │            48 │              4 │ increase  │
+│ 2020-03-26 │            36 │            -25 │ decrease  │
+│ 2020-03-27 │            37 │              3 │ increase  │
+│ 2020-03-28 │            38 │              3 │ increase  │
+│ 2020-03-29 │            59 │             55 │ increase  │
+│ 2020-03-30 │            94 │             59 │ increase  │
+│ 2020-03-31 │            91 │             -3 │ decrease  │
+│ 2020-04-01 │            67 │            -26 │ decrease  │
+│ 2020-04-02 │           104 │             55 │ increase  │
+│ 2020-04-03 │           145 │             39 │ increase  │
+```
+
+:::note
+As mentioned in the [GitHub repo](https://github.com/GoogleCloudPlatform/covid-19-open-data), the datset is no longer updated as of September 15, 2022.
+:::
--- a/docs/en/getting-started/example-datasets/criteo.md
+++ b/docs/en/getting-started/example-datasets/criteo.md
@ -3,14 +3,56 @@ slug: /en/getting-started/example-datasets/criteo
 sidebar_label: Terabyte Click Logs from Criteo
 ---

-# Terabyte of Click Logs from Criteo 
+# Terabyte of Click Logs from Criteo

 Download the data from http://labs.criteo.com/downloads/download-terabyte-click-logs/

 Create a table to import the log to:

 ``` sql
-CREATE TABLE criteo_log (date Date, clicked UInt8, int1 Int32, int2 Int32, int3 Int32, int4 Int32, int5 Int32, int6 Int32, int7 Int32, int8 Int32, int9 Int32, int10 Int32, int11 Int32, int12 Int32, int13 Int32, cat1 String, cat2 String, cat3 String, cat4 String, cat5 String, cat6 String, cat7 String, cat8 String, cat9 String, cat10 String, cat11 String, cat12 String, cat13 String, cat14 String, cat15 String, cat16 String, cat17 String, cat18 String, cat19 String, cat20 String, cat21 String, cat22 String, cat23 String, cat24 String, cat25 String, cat26 String) ENGINE = Log
+CREATE TABLE criteo_log (
+    date Date,
+    clicked UInt8,
+    int1 Int32,
+    int2 Int32,
+    int3 Int32,
+    int4 Int32,
+    int5 Int32,
+    int6 Int32,
+    int7 Int32,
+    int8 Int32,
+    int9 Int32,
+    int10 Int32,
+    int11 Int32,
+    int12 Int32,
+    int13 Int32,
+    cat1 String,
+    cat2 String,
+    cat3 String,
+    cat4 String,
+    cat5 String,
+    cat6 String,
+    cat7 String,
+    cat8 String,
+    cat9 String,
+    cat10 String,
+    cat11 String,
+    cat12 String,
+    cat13 String,
+    cat14 String,
+    cat15 String,
+    cat16 String,
+    cat17 String,
+    cat18 String,
+    cat19 String,
+    cat20 String,
+    cat21 String,
+    cat22 String,
+    cat23 String,
+    cat24 String,
+    cat25 String,
+    cat26 String
+) ENGINE = Log;
 ```

 Download the data:
@ -73,7 +115,52 @@ ORDER BY (date, icat1)
 Transform data from the raw log and put it in the second table:

 ``` sql
-INSERT INTO criteo SELECT date, clicked, int1, int2, int3, int4, int5, int6, int7, int8, int9, int10, int11, int12, int13, reinterpretAsUInt32(unhex(cat1)) AS icat1, reinterpretAsUInt32(unhex(cat2)) AS icat2, reinterpretAsUInt32(unhex(cat3)) AS icat3, reinterpretAsUInt32(unhex(cat4)) AS icat4, reinterpretAsUInt32(unhex(cat5)) AS icat5, reinterpretAsUInt32(unhex(cat6)) AS icat6, reinterpretAsUInt32(unhex(cat7)) AS icat7, reinterpretAsUInt32(unhex(cat8)) AS icat8, reinterpretAsUInt32(unhex(cat9)) AS icat9, reinterpretAsUInt32(unhex(cat10)) AS icat10, reinterpretAsUInt32(unhex(cat11)) AS icat11, reinterpretAsUInt32(unhex(cat12)) AS icat12, reinterpretAsUInt32(unhex(cat13)) AS icat13, reinterpretAsUInt32(unhex(cat14)) AS icat14, reinterpretAsUInt32(unhex(cat15)) AS icat15, reinterpretAsUInt32(unhex(cat16)) AS icat16, reinterpretAsUInt32(unhex(cat17)) AS icat17, reinterpretAsUInt32(unhex(cat18)) AS icat18, reinterpretAsUInt32(unhex(cat19)) AS icat19, reinterpretAsUInt32(unhex(cat20)) AS icat20, reinterpretAsUInt32(unhex(cat21)) AS icat21, reinterpretAsUInt32(unhex(cat22)) AS icat22, reinterpretAsUInt32(unhex(cat23)) AS icat23, reinterpretAsUInt32(unhex(cat24)) AS icat24, reinterpretAsUInt32(unhex(cat25)) AS icat25, reinterpretAsUInt32(unhex(cat26)) AS icat26 FROM criteo_log;
+INSERT INTO
+    criteo
+SELECT
+    date,
+    clicked,
+    int1,
+    int2,
+    int3,
+    int4,
+    int5,
+    int6,
+    int7,
+    int8,
+    int9,
+    int10,
+    int11,
+    int12,
+    int13,
+    reinterpretAsUInt32(unhex(cat1)) AS icat1,
+    reinterpretAsUInt32(unhex(cat2)) AS icat2,
+    reinterpretAsUInt32(unhex(cat3)) AS icat3,
+    reinterpretAsUInt32(unhex(cat4)) AS icat4,
+    reinterpretAsUInt32(unhex(cat5)) AS icat5,
+    reinterpretAsUInt32(unhex(cat6)) AS icat6,
+    reinterpretAsUInt32(unhex(cat7)) AS icat7,
+    reinterpretAsUInt32(unhex(cat8)) AS icat8,
+    reinterpretAsUInt32(unhex(cat9)) AS icat9,
+    reinterpretAsUInt32(unhex(cat10)) AS icat10,
+    reinterpretAsUInt32(unhex(cat11)) AS icat11,
+    reinterpretAsUInt32(unhex(cat12)) AS icat12,
+    reinterpretAsUInt32(unhex(cat13)) AS icat13,
+    reinterpretAsUInt32(unhex(cat14)) AS icat14,
+    reinterpretAsUInt32(unhex(cat15)) AS icat15,
+    reinterpretAsUInt32(unhex(cat16)) AS icat16,
+    reinterpretAsUInt32(unhex(cat17)) AS icat17,
+    reinterpretAsUInt32(unhex(cat18)) AS icat18,
+    reinterpretAsUInt32(unhex(cat19)) AS icat19,
+    reinterpretAsUInt32(unhex(cat20)) AS icat20,
+    reinterpretAsUInt32(unhex(cat21)) AS icat21,
+    reinterpretAsUInt32(unhex(cat22)) AS icat22,
+    reinterpretAsUInt32(unhex(cat23)) AS icat23,
+    reinterpretAsUInt32(unhex(cat24)) AS icat24,
+    reinterpretAsUInt32(unhex(cat25)) AS icat25,
+    reinterpretAsUInt32(unhex(cat26)) AS icat26
+FROM
+    criteo_log;

 DROP TABLE criteo_log;
 ```
--- a/docs/en/getting-started/example-datasets/github.md
+++ b/docs/en/getting-started/example-datasets/github.md
@ -1,12 +1,13 @@
 ---
 slug: /en/getting-started/example-datasets/github
-sidebar_label: GitHub Repo Analysis
+sidebar_label: Github Repo
+sidebar_position: 1
 description: Analyze the ClickHouse GitHub repo or any repository of your choosing
 ---

-# ClickHouse GitHub data
+# Writing Queries in ClickHouse using GitHub Data

-This dataset contains all of the commits and changes for the ClickHouse repository. It can be generated using the native `git-import` tool distributed with ClickHouse. 
+This dataset contains all of the commits and changes for the ClickHouse repository. It can be generated using the native `git-import` tool distributed with ClickHouse.

 The generated data provides a `tsv` file for each of the following tables:

@ -323,7 +324,7 @@ Note a more complex variant of this query exists where we find the [line-by-line

 ## Find the current active files

-This is important for later analysis when we only want to consider the current files in the repository. We estimate this set as the files which haven't been renamed or deleted (and then re-added/re-named). 
+This is important for later analysis when we only want to consider the current files in the repository. We estimate this set as the files which haven't been renamed or deleted (and then re-added/re-named).

 **Note there appears to have been a broken commit history in relation to files under the `dbms`, `libs`, `tests/testflows/` directories during their renames. We also thus exclude these.**

@ -417,7 +418,7 @@ git ls-files | grep -v -E 'generated\.cpp|^(contrib|docs?|website|libs/(libcityh

 The difference here is caused by a few factors:

- A rename can occur alongside other modifications to the file. These are listed as separate events in file_changes but with the same time. The `argMax` function has no way of distinguishing these - it picks the first value. The natural ordering of the inserts (the only means of knowing the correct order) is not maintained across the union so modified events can be selected. For example, below the `src/Functions/geometryFromColumn.h` file has several modifications before being renamed to `src/Functions/geometryConverters.h`. Our current solution may pick a Modify event as the latest change causing `src/Functions/geometryFromColumn.h` to be retained. 
+- A rename can occur alongside other modifications to the file. These are listed as separate events in file_changes but with the same time. The `argMax` function has no way of distinguishing these - it picks the first value. The natural ordering of the inserts (the only means of knowing the correct order) is not maintained across the union so modified events can be selected. For example, below the `src/Functions/geometryFromColumn.h` file has several modifications before being renamed to `src/Functions/geometryConverters.h`. Our current solution may pick a Modify event as the latest change causing `src/Functions/geometryFromColumn.h` to be retained.

 [play](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICAgIGNoYW5nZV90eXBlLAogICAgICBwYXRoLAogICAgICBvbGRfcGF0aCwKICAgICAgdGltZSwKICAgICAgY29tbWl0X2hhc2gKICBGUk9NIGdpdF9jbGlja2hvdXNlLmZpbGVfY2hhbmdlcwogIFdIRVJFIChwYXRoID0gJ3NyYy9GdW5jdGlvbnMvZ2VvbWV0cnlGcm9tQ29sdW1uLmgnKSBPUiAob2xkX3BhdGggPSAnc3JjL0Z1bmN0aW9ucy9nZW9tZXRyeUZyb21Db2x1bW4uaCcpCg==)

@ -1386,7 +1387,7 @@ LIMIT 1 BY day_of_week
 7 rows in set. Elapsed: 0.004 sec. Processed 21.82 thousand rows, 140.02 KB (4.88 million rows/s., 31.29 MB/s.)
 ```

-This is still a little simple and doesn't reflect people's work. 
+This is still a little simple and doesn't reflect people's work.

 A better metric might be who is the top contributor each day as a fraction of the total work performed in the last year. Note that we treat the deletion and adding code equally.

@ -1952,7 +1953,7 @@ SELECT

 Most contributors write more code than tests, as you'd expect.

-What about who adds the most comments when contributing code? 
+What about who adds the most comments when contributing code?

 [play](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICBhdXRob3IsCiAgICBhdmcocmF0aW9fY29tbWVudHMpIEFTIGF2Z19yYXRpb19jb21tZW50cywKICAgIHN1bShjb2RlKSBBUyBjb2RlCkZST00KKAogICAgU0VMRUNUCiAgICAgICAgYXV0aG9yLAogICAgICAgIGNvbW1pdF9oYXNoLAogICAgICAgIGNvdW50SWYobGluZV90eXBlID0gJ0NvbW1lbnQnKSBBUyBjb21tZW50cywKICAgICAgICBjb3VudElmKGxpbmVfdHlwZSA9ICdDb2RlJykgQVMgY29kZSwKICAgICAgICBpZihjb21tZW50cyA+IDAsIGNvbW1lbnRzIC8gKGNvbW1lbnRzICsgY29kZSksIDApIEFTIHJhdGlvX2NvbW1lbnRzCiAgICBGUk9NIGdpdF9jbGlja2hvdXNlLmxpbmVfY2hhbmdlcwogICAgR1JPVVAgQlkKICAgICAgICBhdXRob3IsCiAgICAgICAgY29tbWl0X2hhc2gKKQpHUk9VUCBCWSBhdXRob3IKT1JERVIgQlkgY29kZSBERVNDCkxJTUlUIDEwCg==)

@ -2393,7 +2394,7 @@ WHERE (path = 'src/Storages/StorageReplicatedMergeTree.cpp') AND (change_type =

 This makes viewing the full history of a file challenging since we don't have a single value connecting all line or file changes.

-To address this, we can use User Defined Functions (UDFs). These cannot, currently, be recursive, so to identify the history of a file we must define a series of UDFs which call each other explicitly. 
+To address this, we can use User Defined Functions (UDFs). These cannot, currently, be recursive, so to identify the history of a file we must define a series of UDFs which call each other explicitly.

 This means we can only track renames to a maximum depth - the below example is 5 deep. It is unlikely a file will be renamed more times than this, so for now, this is sufficient.

--- a/docs/en/getting-started/example-datasets/metrica.md
+++ b/docs/en/getting-started/example-datasets/metrica.md
@ -84,7 +84,7 @@ clickhouse-client --query "SELECT COUNT(*) FROM datasets.visits_v1"
 1680609
 ```

-## An example JOIN 
+## An example JOIN

 The hits and visits dataset is used in the ClickHouse test
 routines, this is one of the queries from the test suite. The rest
@ -131,10 +131,10 @@ FORMAT PrettyCompact"

 ## Next Steps

-[A Practical Introduction to Sparse Primary Indexes in ClickHouse](../../guides/improving-query-performance/sparse-primary-indexes/sparse-primary-indexes-intro.md) uses the hits dataset to discuss the differences in ClickHouse indexing compared to traditional relational databases, how ClickHouse builds and uses a sparse primary index, and indexing best practices.
+[A Practical Introduction to Sparse Primary Indexes in ClickHouse](/docs/en/guides/best-practices/sparse-primary-indexes.md) uses the hits dataset to discuss the differences in ClickHouse indexing compared to traditional relational databases, how ClickHouse builds and uses a sparse primary index, and indexing best practices.

 Additional examples of queries to these tables can be found among the ClickHouse [stateful tests](https://github.com/ClickHouse/ClickHouse/blob/d7129855757f38ceec3e4ecc6dafacdabe9b178f/tests/queries/1_stateful/00172_parallel_join.sql).

 :::note
-The test suite uses a database name `test`, and the tables are named `hits` and `visits`.  You can rename your database and tables, or edit the SQL from the test file.  
+The test suite uses a database name `test`, and the tables are named `hits` and `visits`.  You can rename your database and tables, or edit the SQL from the test file.
 :::
--- a/docs/en/getting-started/example-datasets/nypd_complaint_data.md
+++ b/docs/en/getting-started/example-datasets/nypd_complaint_data.md
@ -16,7 +16,7 @@ While working through this guide you will:

 The dataset used in this guide comes from the NYC Open Data team, and contains data about "all valid felony, misdemeanor, and violation crimes reported to the New York City Police Department (NYPD)". At the time of writing, the data file is 166MB, but it is updated regularly.

-**Source**: [data.cityofnewyork.us](https://data.cityofnewyork.us/Public-Safety/NYPD-Complaint-Data-Current-Year-To-Date-/5uac-w243)  
+**Source**: [data.cityofnewyork.us](https://data.cityofnewyork.us/Public-Safety/NYPD-Complaint-Data-Current-Year-To-Date-/5uac-w243)
 **Terms of use**: https://www1.nyc.gov/home/terms-of-use.page

 ## Prerequisites
@ -35,7 +35,7 @@ The examples in this guide assume that you have saved the TSV file to `${HOME}/N

 ## Familiarize yourself with the TSV file

-Before starting to work with the ClickHouse database familiarize yourself with the data. 
+Before starting to work with the ClickHouse database familiarize yourself with the data.

 ### Look at the fields in the source TSV file

@ -47,15 +47,15 @@ clickhouse-local --query \

 Sample response
 ```response
-CMPLNT_NUM                  Nullable(Float64)					
-ADDR_PCT_CD                 Nullable(Float64)					
-BORO_NM                     Nullable(String)					
-CMPLNT_FR_DT                Nullable(String)					
-CMPLNT_FR_TM                Nullable(String)					
+CMPLNT_NUM                  Nullable(Float64)
+ADDR_PCT_CD                 Nullable(Float64)
+BORO_NM                     Nullable(String)
+CMPLNT_FR_DT                Nullable(String)
+CMPLNT_FR_TM                Nullable(String)
 ```

 :::tip
-Most of the time the above command will let you know which fields in the input data are numeric, and which are strings, and which are tuples.  This is not always the case.  Because ClickHouse is routineley used with datasets containing billions of records there is a default number (100) of rows examined to [infer the schema](../../guides/developer/working-with-json/json-semi-structured.md/#relying-on-schema-inference) in order to avoid parsing billions of rows to infer the schema. The response below may not match what you see, as the dataset is updated several times each year. Looking at the Data Dictionary you can see that CMPLNT_NUM is specified as text, and not numeric.  By overriding the default of 100 rows for inference with the setting `SETTINGS input_format_max_rows_to_read_for_schema_inference=2000`
+Most of the time the above command will let you know which fields in the input data are numeric, and which are strings, and which are tuples.  This is not always the case.  Because ClickHouse is routineley used with datasets containing billions of records there is a default number (100) of rows examined to [infer the schema](/docs/en/integrations/data-ingestion/data-formats/json.md#relying-on-schema-inference) in order to avoid parsing billions of rows to infer the schema. The response below may not match what you see, as the dataset is updated several times each year. Looking at the Data Dictionary you can see that CMPLNT_NUM is specified as text, and not numeric.  By overriding the default of 100 rows for inference with the setting `SETTINGS input_format_max_rows_to_read_for_schema_inference=2000`
 you can get a better idea of the content.

 Note: as of version 22.5 the default is now 25,000 rows for inferring the schema, so only change the setting if you are on an older version or if you need more than 25,000 rows to be sampled.
@ -65,46 +65,46 @@ Run this command at your command prompt.  You will be using `clickhouse-local` t
 ```sh
 clickhouse-local --input_format_max_rows_to_read_for_schema_inference=2000 \
 --query \
-"describe file('${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv', 'TSVWithNames')" 
+"describe file('${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv', 'TSVWithNames')"
 ```

 Result:
 ```response
-CMPLNT_NUM        Nullable(String)					
-ADDR_PCT_CD       Nullable(Float64)					
-BORO_NM           Nullable(String)					
-CMPLNT_FR_DT      Nullable(String)					
-CMPLNT_FR_TM      Nullable(String)					
-CMPLNT_TO_DT      Nullable(String)					
-CMPLNT_TO_TM      Nullable(String)					
-CRM_ATPT_CPTD_CD  Nullable(String)					
-HADEVELOPT        Nullable(String)					
-HOUSING_PSA       Nullable(Float64)					
-JURISDICTION_CODE Nullable(Float64)					
-JURIS_DESC        Nullable(String)					
-KY_CD             Nullable(Float64)					
-LAW_CAT_CD        Nullable(String)					
-LOC_OF_OCCUR_DESC Nullable(String)					
-OFNS_DESC         Nullable(String)					
-PARKS_NM          Nullable(String)					
-PATROL_BORO       Nullable(String)					
-PD_CD             Nullable(Float64)					
-PD_DESC           Nullable(String)					
-PREM_TYP_DESC     Nullable(String)					
-RPT_DT            Nullable(String)					
-STATION_NAME      Nullable(String)					
-SUSP_AGE_GROUP    Nullable(String)					
-SUSP_RACE         Nullable(String)					
-SUSP_SEX          Nullable(String)					
-TRANSIT_DISTRICT  Nullable(Float64)					
-VIC_AGE_GROUP     Nullable(String)					
-VIC_RACE          Nullable(String)					
-VIC_SEX           Nullable(String)					
-X_COORD_CD        Nullable(Float64)					
-Y_COORD_CD        Nullable(Float64)					
-Latitude          Nullable(Float64)					
-Longitude         Nullable(Float64)					
-Lat_Lon           Tuple(Nullable(Float64), Nullable(Float64))					
+CMPLNT_NUM        Nullable(String)
+ADDR_PCT_CD       Nullable(Float64)
+BORO_NM           Nullable(String)
+CMPLNT_FR_DT      Nullable(String)
+CMPLNT_FR_TM      Nullable(String)
+CMPLNT_TO_DT      Nullable(String)
+CMPLNT_TO_TM      Nullable(String)
+CRM_ATPT_CPTD_CD  Nullable(String)
+HADEVELOPT        Nullable(String)
+HOUSING_PSA       Nullable(Float64)
+JURISDICTION_CODE Nullable(Float64)
+JURIS_DESC        Nullable(String)
+KY_CD             Nullable(Float64)
+LAW_CAT_CD        Nullable(String)
+LOC_OF_OCCUR_DESC Nullable(String)
+OFNS_DESC         Nullable(String)
+PARKS_NM          Nullable(String)
+PATROL_BORO       Nullable(String)
+PD_CD             Nullable(Float64)
+PD_DESC           Nullable(String)
+PREM_TYP_DESC     Nullable(String)
+RPT_DT            Nullable(String)
+STATION_NAME      Nullable(String)
+SUSP_AGE_GROUP    Nullable(String)
+SUSP_RACE         Nullable(String)
+SUSP_SEX          Nullable(String)
+TRANSIT_DISTRICT  Nullable(Float64)
+VIC_AGE_GROUP     Nullable(String)
+VIC_RACE          Nullable(String)
+VIC_SEX           Nullable(String)
+X_COORD_CD        Nullable(Float64)
+Y_COORD_CD        Nullable(Float64)
+Latitude          Nullable(Float64)
+Longitude         Nullable(Float64)
+Lat_Lon           Tuple(Nullable(Float64), Nullable(Float64))
 New Georeferenced Column Nullable(String)
 ```

@ -362,7 +362,7 @@ The dates shown as `1925` above are from errors in the data.  There are several

 The decisions made above on the data types used for the columns are reflected in the table schema
 below. We also need to decide on the `ORDER BY` and `PRIMARY KEY` used for the table.  At least one
-of `ORDER BY` or `PRIMARY KEY` must be specified.  Here are some guidelines on deciding on the 
+of `ORDER BY` or `PRIMARY KEY` must be specified.  Here are some guidelines on deciding on the
 columns to includes in `ORDER BY`, and more information is in the *Next Steps* section at the end
 of this document.

@ -420,7 +420,7 @@ ORDER BY ( borough, offense_description, date_reported )
 Putting together the changes to data types and the `ORDER BY` tuple gives this table structure:

 ```sql
-CREATE TABLE NYPD_Complaint ( 
+CREATE TABLE NYPD_Complaint (
    complaint_number     String,
    precinct             UInt8,
    borough              LowCardinality(String),
@ -429,7 +429,7 @@ CREATE TABLE NYPD_Complaint (
    was_crime_completed  String,
    housing_authority    String,
    housing_level_code   UInt32,
-    jurisdiction_code    UInt8, 
+    jurisdiction_code    UInt8,
    jurisdiction         LowCardinality(String),
    offense_code         UInt8,
    offense_level        LowCardinality(String),
@ -478,7 +478,7 @@ Query id: 6a5b10bf-9333-4090-b36e-c7f08b1d9e01

 Row 1:
 ──────
-partition_key: 
+partition_key:
 sorting_key:   borough, offense_description, date_reported
 primary_key:   borough, offense_description, date_reported
 table:         NYPD_Complaint
@ -495,7 +495,7 @@ We will use `clickhouse-local` tool for data preprocessing and `clickhouse-clien
 :::tip
 `table='input'` appears in the arguments to clickhouse-local below.  clickhouse-local takes the provided input (`cat ${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv`) and inserts the input into a table.  By default the table is named `table`.  In this guide the name of the table is set to `input` to make the data flow clearer. The final argument to clickhouse-local is a query that selects from the table (`FROM input`) which is then piped to `clickhouse-client` to populate the table `NYPD_Complaint`.
 :::
-  
+
 ```sql
 cat ${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv \
  | clickhouse-local --table='input' --input-format='TSVWithNames' \
@ -512,12 +512,12 @@ cat ${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv \
      CRM_ATPT_CPTD_CD                            AS was_crime_completed,
      HADEVELOPT                                  AS housing_authority_development,
      HOUSING_PSA                                 AS housing_level_code,
-      JURISDICTION_CODE                           AS jurisdiction_code, 
+      JURISDICTION_CODE                           AS jurisdiction_code,
      JURIS_DESC                                  AS jurisdiction,
      KY_CD                                       AS offense_code,
      LAW_CAT_CD                                  AS offense_level,
      LOC_OF_OCCUR_DESC                           AS location_descriptor,
-      OFNS_DESC                                   AS offense_description, 
+      OFNS_DESC                                   AS offense_description,
      PARKS_NM                                    AS park_name,
      PATROL_BORO                                 AS patrol_borough,
      PD_CD,
@ -529,7 +529,7 @@ cat ${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv \
      SUSP_RACE                                   AS suspect_race,
      SUSP_SEX                                    AS suspect_sex,
      TRANSIT_DISTRICT                            AS transit_district,
-      VIC_AGE_GROUP                               AS victim_age_group,   
+      VIC_AGE_GROUP                               AS victim_age_group,
      VIC_RACE                                    AS victim_race,
      VIC_SEX                                     AS victim_sex,
      X_COORD_CD                                  AS NY_x_coordinate,
@ -538,7 +538,7 @@ cat ${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv \
      Longitude
    FROM input" \
  | clickhouse-client --query='INSERT INTO NYPD_Complaint FORMAT TSV'
-```  
+```

 ## Validate the Data {#validate-data}

@ -560,7 +560,7 @@ Result:
 │  208993 │
 └─────────┘

-1 row in set. Elapsed: 0.001 sec. 
+1 row in set. Elapsed: 0.001 sec.
 ```

 The size of the dataset in ClickHouse is just 12% of the original TSV file, compare the size of the original TSV file with the size of the table:
@ -651,4 +651,4 @@ Query id: 8cdcdfd4-908f-4be0-99e3-265722a2ab8d

 ## Next Steps

-[A Practical Introduction to Sparse Primary Indexes in ClickHouse](../../guides/improving-query-performance/sparse-primary-indexes/sparse-primary-indexes-intro.md) discusses the differences in ClickHouse indexing compared to traditional relational databases, how ClickHouse builds and uses a sparse primary index, and indexing best practices.
+[A Practical Introduction to Sparse Primary Indexes in ClickHouse](/docs/en/guides/best-practices/sparse-primary-indexes.md) discusses the differences in ClickHouse indexing compared to traditional relational databases, how ClickHouse builds and uses a sparse primary index, and indexing best practices.
--- a/docs/en/getting-started/example-datasets/recipes.md
+++ b/docs/en/getting-started/example-datasets/recipes.md
@ -80,7 +80,7 @@ Result:

 ### Top Components by the Number of Recipes:

-In this example we learn how to use [arrayJoin](../../sql-reference/functions/array-join/) function to expand an array into a set of rows.
+In this example we learn how to use [arrayJoin](../../sql-reference/functions/array-join.md) function to expand an array into a set of rows.

 Query:

@ -185,7 +185,7 @@ Result:
 10 rows in set. Elapsed: 0.215 sec. Processed 2.23 million rows, 1.48 GB (10.35 million rows/s., 6.86 GB/s.)
 ```

-In this example, we involve [has](../../sql-reference/functions/array-functions/#hasarr-elem) function to filter by array elements and sort by the number of directions.
+In this example, we involve [has](../../sql-reference/functions/array-functions.md#hasarr-elem) function to filter by array elements and sort by the number of directions.

 There is a wedding cake that requires the whole 126 steps to produce! Show that directions:

--- a/docs/en/getting-started/example-datasets/uk-price-paid.md
+++ b/docs/en/getting-started/example-datasets/uk-price-paid.md
@ -1,17 +1,17 @@
 ---
 slug: /en/getting-started/example-datasets/uk-price-paid
-sidebar_label: UK Property Price Paid
+sidebar_label: UK Property Prices
 sidebar_position: 1
-title: "UK Property Price Paid"
 ---

-The dataset contains data about prices paid for real-estate property in England and Wales. The data is available since year 1995.
-The size of the dataset in uncompressed form is about 4 GiB and it will take about 278 MiB in ClickHouse.
+# The UK property prices dataset

-Source: https://www.gov.uk/government/statistical-data-sets/price-paid-data-downloads
-Description of the fields: https://www.gov.uk/guidance/about-the-price-paid-data
+Projections are a great way to improve the performance of queries that you run frequently. We will demonstrate the power of projections
+using the UK property dataset, which contains data about prices paid for real-estate property in England and Wales. The data is available since 1995, and the size of the dataset in uncompressed form is about 4 GiB (which will only take about 278 MiB in ClickHouse).

-Contains HM Land Registry data © Crown copyright and database right 2021. This data is licensed under the Open Government Licence v3.0.
+- Source: https://www.gov.uk/government/statistical-data-sets/price-paid-data-downloads
+- Description of the fields: https://www.gov.uk/guidance/about-the-price-paid-data
+- Contains HM Land Registry data © Crown copyright and database right 2021. This data is licensed under the Open Government Licence v3.0.

 ## Create the Table {#create-table}

--- a/docs/en/getting-started/example-datasets/youtube-dislikes.md
+++ b/docs/en/getting-started/example-datasets/youtube-dislikes.md
@ -0,0 +1,219 @@
+---
+slug: /en/getting-started/example-datasets/youtube-dislikes
+sidebar_label: YouTube Dislikes
+description: A collection is dislikes of YouTube videos.
+---
+
+# YouTube dataset of dislikes
+
+In November of 2021, YouTube removed the public ***dislike*** count from all of its videos. While creators can still see the number of dislikes, viewers can only see how many ***likes*** a video has received.
+
+:::important
+The dataset has over 4.55 billion records, so be careful just copying-and-pasting the commands below unless your resources can handle that type of volume. The commands below were executed on a **Production** instance of [ClickHouse Cloud](https://clickhouse.cloud).
+:::
+
+The data is in a JSON format and can be downloaded from [archive.org](https://archive.org/download/dislikes_youtube_2021_12_video_json_files). We have made this same data available in S3 so that it can be downloaded more efficiently into a ClickHouse Cloud instance.
+
+Here are the steps to create a table in ClickHouse Cloud and insert the data.
+
+:::note
+The steps below will easily work on a local install of ClickHouse too. The only change would be to use the `s3` function instead of `s3cluster` (unless you have a cluster configured - in which case change `default` to the name of your cluster).
+:::
+
+## Step-by-step instructions
+
+1. Let's see what the data looks like. The `s3cluster` table function returns a table, so we can `DESCRIBE` the reult:
+
+```sql
+DESCRIBE s3Cluster(
+    'default',
+    'https://clickhouse-public-datasets.s3.amazonaws.com/youtube/original/files/*.zst',
+    'JSONLines'
+);
+```
+
+ClickHouse infers the following schema from the JSON file:
+
+```response
+┌─name────────────────┬─type─────────────────────────────────┐
+│ id                  │ Nullable(String)                     │
+│ fetch_date          │ Nullable(Int64)                      │
+│ upload_date         │ Nullable(String)                     │
+│ title               │ Nullable(String)                     │
+│ uploader_id         │ Nullable(String)                     │
+│ uploader            │ Nullable(String)                     │
+│ uploader_sub_count  │ Nullable(Int64)                      │
+│ is_age_limit        │ Nullable(Bool)                       │
+│ view_count          │ Nullable(Int64)                      │
+│ like_count          │ Nullable(Int64)                      │
+│ dislike_count       │ Nullable(Int64)                      │
+│ is_crawlable        │ Nullable(Bool)                       │
+│ is_live_content     │ Nullable(Bool)                       │
+│ has_subtitles       │ Nullable(Bool)                       │
+│ is_ads_enabled      │ Nullable(Bool)                       │
+│ is_comments_enabled │ Nullable(Bool)                       │
+│ description         │ Nullable(String)                     │
+│ rich_metadata       │ Array(Map(String, Nullable(String))) │
+│ super_titles        │ Array(Map(String, Nullable(String))) │
+│ uploader_badges     │ Nullable(String)                     │
+│ video_badges        │ Nullable(String)                     │
+└─────────────────────┴──────────────────────────────────────┘
+```
+
+2. Based on the inferred schema, we cleaned up the data types and added a primary key. Define the following table:
+
+```sql
+CREATE TABLE youtube
+(
+    `id` String,
+    `fetch_date` DateTime,
+    `upload_date` String,
+    `title` String,
+    `uploader_id` String,
+    `uploader` String,
+    `uploader_sub_count` Int64,
+    `is_age_limit` Bool,
+    `view_count` Int64,
+    `like_count` Int64,
+    `dislike_count` Int64,
+    `is_crawlable` Bool,
+    `has_subtitles` Bool,
+    `is_ads_enabled` Bool,
+    `is_comments_enabled` Bool,
+    `description` String,
+    `rich_metadata` Array(Map(String, String)),
+    `super_titles` Array(Map(String, String)),
+    `uploader_badges` String,
+    `video_badges` String
+)
+ENGINE = MergeTree
+ORDER BY (upload_date, uploader);
+```
+
+3. The following command streams the records from the S3 files into the `youtube` table.
+
+:::important
+This inserts a lot of data - 4.65 billion rows. If you do not want the entire dataset, simply add a `LIMIT` clause with the desired number of rows.
+:::
+
+```sql
+INSERT INTO youtube
+SETTINGS input_format_null_as_default = 1
+SELECT
+    id,
+    parseDateTimeBestEffortUS(toString(fetch_date)) AS fetch_date,
+    upload_date,
+    ifNull(title, '') AS title,
+    uploader_id,
+    ifNull(uploader, '') AS uploader,
+    uploader_sub_count,
+    is_age_limit,
+    view_count,
+    like_count,
+    dislike_count,
+    is_crawlable,
+    has_subtitles,
+    is_ads_enabled,
+    is_comments_enabled,
+    ifNull(description, '') AS description,
+    rich_metadata,
+    super_titles,
+    ifNull(uploader_badges, '') AS uploader_badges,
+    ifNull(video_badges, '') AS video_badges
+FROM s3Cluster(
+       'default',
+       'https://clickhouse-public-datasets.s3.amazonaws.com/youtube/original/files/*.zst',
+       'JSONLines'
+    );
+```
+
+4. Open a new tab in the SQL Console of ClickHouse Cloud (or a new `clickhouse-client` window) and watch the count increase. It will take a while to insert 4.56B rows, depending on your server resources. (Withtout any tweaking of settings, it takes about 4.5 hours.)
+
+```sql
+SELECT formatReadableQuantity(count())
+FROM youtube
+```
+
+```response
+┌─formatReadableQuantity(count())─┐
+│ 4.56 billion                    │
+└─────────────────────────────────┘
+```
+
+5. Once the data is inserted, go ahead and count the number of dislikes of your favorite videos or channels. Let's see how many videos were uploaded by ClickHouse:
+
+```sql
+SELECT count()
+FROM youtube
+WHERE uploader = 'ClickHouse';
+```
+
+```response
+┌─count()─┐
+│      84 │
+└─────────┘
+
+1 row in set. Elapsed: 0.570 sec. Processed 237.57 thousand rows, 5.77 MB (416.54 thousand rows/s., 10.12 MB/s.)
+```
+
+:::note
+The query above runs so quickly because we chose `uploader` as the first column of the primary key - so it only had to process 237k rows.
+:::
+
+6. Let's look and likes and dislikes of ClickHouse videos:
+
+```sql
+SELECT
+    title,
+    like_count,
+    dislike_count
+FROM youtube
+WHERE uploader = 'ClickHouse'
+ORDER BY dislike_count DESC;
+```
+
+The response looks like:
+
+```response
+┌─title────────────────────────────────────────────────────────────────────────────────────────────────┬─like_count─┬─dislike_count─┐
+│ ClickHouse v21.11 Release Webinar                                                                    │         52 │             3 │
+│ ClickHouse Introduction                                                                              │         97 │             3 │
+│ Casa Modelo Algarve                                                                                  │        180 │             3 │
+│ Профайлер запросов:  трудный путь                                                                    │         33 │             3 │
+│ ClickHouse в Курсометре                                                                              │          4 │             2 │
+│ 10 Good Reasons to Use ClickHouse                                                                    │         27 │             2 │
+...
+
+84 rows in set. Elapsed: 0.013 sec. Processed 155.65 thousand rows, 16.94 MB (11.96 million rows/s., 1.30 GB/s.)
+```
+
+7. Here is a search for videos with **ClickHouse** in the `title` or `description` fields:
+
+```sql
+SELECT
+    view_count,
+    like_count,
+    dislike_count,
+    concat('https://youtu.be/', id) AS url,
+    title
+FROM youtube
+WHERE (title ILIKE '%ClickHouse%') OR (description ILIKE '%ClickHouse%')
+ORDER BY
+    like_count DESC,
+    view_count DESC
+```
+
+This query has to process every row, and also parse through two columns of strings. Even then, we get decent performance at 4.15M rows/second:
+
+```response
+1174 rows in set. Elapsed: 1099.368 sec. Processed 4.56 billion rows, 1.98 TB (4.15 million rows/s., 1.80 GB/s.)
+```
+
+The results look like:
+
+```response
+┌─view_count─┬─like_count─┬─dislike_count─┬─url──────────────────────────┬─title──────────────────────────────────────────────────────────────────────────────────────────────────┐
+│       1919 │         63 │             1 │ https://youtu.be/b9MeoOtAivQ │ ClickHouse v21.10 Release Webinar                                                                      │
+│       8710 │         62 │             4 │ https://youtu.be/PeV1mC2z--M │ What is JDBC DriverManager? | JDBC                                                                     │
+│       3534 │         62 │             1 │ https://youtu.be/8nWRhK9gw10 │ CLICKHOUSE - Arquitetura Modular                                                                       │
+```
--- a/docs/en/getting-started/install.md
+++ b/docs/en/getting-started/install.md
@ -14,75 +14,35 @@ import CodeBlock from '@theme/CodeBlock';
 You have three options for getting up and running with ClickHouse:

 - **[ClickHouse Cloud](https://clickhouse.com/cloud/):** The official ClickHouse as a service, - built by, maintained and supported by the creators of ClickHouse
- **[Self-managed ClickHouse](#self-managed-install):** ClickHouse can run on any Linux, FreeBSD, or macOS with x86-64, ARM, or PowerPC64LE CPU architecture
- **[Docker Image](https://hub.docker.com/r/clickhouse/clickhouse-server/):** Read the guide with the official image in Docker Hub
+- **[Quick Install](#quick-install):** an easy-to-download binary for testing and developing with ClickHouse
+- **[Production Deployments](#available-installation-options):** ClickHouse can run on any Linux, FreeBSD, or macOS with x86-64, ARM, or PowerPC64LE CPU architecture
+- **[Docker Image](https://hub.docker.com/r/clickhouse/clickhouse-server/):** use the official Docker image in Docker Hub

 ## ClickHouse Cloud

 The quickest and easiest way to get up and running with ClickHouse is to create a new service in [ClickHouse Cloud](https://clickhouse.cloud/).

-## Self-Managed Install
+## Quick Install

 :::tip
 For production installs of a specific release version see the [installation options](#available-installation-options) down below.
 :::

-<Tabs>
-<TabItem value="linux" label="Linux" default>
+On Linux and macOS:

-1. The simplest way to download ClickHouse locally is to run the following command. If your operating system is supported, an appropriate ClickHouse binary will be downloaded and made runnable:
+1. If you are just getting started and want to see what ClickHouse can do, the simplest way to download ClickHouse locally is to run the following command. It downloads a single binary for your operating system that can be used to run the ClickHouse server, clickhouse-client, clickhouse-local,
+ClickHouse Keeper, and other tools:

  ```bash
  curl https://clickhouse.com/ | sh
  ```

-1. Run the `install` command, which defines a collection of useful symlinks along with the files and folders used by ClickHouse - all of which you can see in the output of the install script:
-
-  ```bash
-  sudo ./clickhouse install
-  ```
-
-1. At the end of the install script, you are prompted for a password for the `default` user. Feel free to enter a password, or you can optionally leave it blank:
-
-  ```response
-  Creating log directory /var/log/clickhouse-server.
-  Creating data directory /var/lib/clickhouse.
-  Creating pid directory /var/run/clickhouse-server.
-   chown -R clickhouse:clickhouse '/var/log/clickhouse-server'
-   chown -R clickhouse:clickhouse '/var/run/clickhouse-server'
-   chown  clickhouse:clickhouse '/var/lib/clickhouse'
-  Enter password for default user:
-  ```
-  You should see the following output:
-
-  ```response
-   ClickHouse has been successfully installed.
-
-   Start clickhouse-server with:
-    sudo clickhouse start
-
-   Start clickhouse-client with:
-    clickhouse-client
-  ```
-
 1. Run the following command to start the ClickHouse server:
    ```bash
-    sudo clickhouse start
+    ./clickhouse server
    ```

-</TabItem>
-<TabItem value="macos" label="macOS">
-
-1. The simplest way to download ClickHouse locally is to run the following command. If your operating system is supported, an appropriate ClickHouse binary will be downloaded and made runnable:
-  ```bash
-  curl https://clickhouse.com/ | sh
-  ```
-
-1. Run the ClickHouse server:
-
-  ```bash
-  ./clickhouse server
-  ```
+    The first time you run this script, the necessary files and folders are created in the current directory, then the server starts.

 1. Open a new terminal and use the **clickhouse-client** to connect to your service:

@ -101,15 +61,14 @@ For production installs of a specific release version see the [installation opti

  You are ready to start sending DDL and SQL commands to ClickHouse!

-</TabItem>
-</Tabs>
-

 :::tip
-The [Quick Start](/docs/en/quick-start.mdx/#step-1-get-clickhouse) walks through the steps to download and run ClickHouse, connect to it, and insert data.
+The [Quick Start](/docs/en/quick-start.mdx) walks through the steps for creating tables and inserting data.
 :::

-## Available Installation Options {#available-installation-options}
+## Production Deployments {#available-installation-options}
+
+For production deployments of ClickHouse, choose from one of the following install options.

 ### From DEB Packages {#install-from-deb-packages}

@ -118,9 +77,12 @@ It is recommended to use official pre-compiled `deb` packages for Debian or Ubun
 #### Setup the Debian repository
 ``` bash
 sudo apt-get install -y apt-transport-https ca-certificates dirmngr
-sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754
+GNUPGHOME=$(mktemp -d)
+sudo GNUPGHOME="$GNUPGHOME" gpg --no-default-keyring --keyring /usr/share/keyrings/clickhouse-keyring.gpg --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 8919F6BD2B48D754
+sudo rm -r "$GNUPGHOME"
+sudo chmod +r /usr/share/keyrings/clickhouse-keyring.gpg

-echo "deb https://packages.clickhouse.com/deb stable main" | sudo tee \
+echo "deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb stable main" | sudo tee \
    /etc/apt/sources.list.d/clickhouse.list
 sudo apt-get update
 ```
@ -174,7 +136,7 @@ clickhouse-client # or "clickhouse-client --password" if you set up a password.

 </details>

-You can replace `stable` with `lts` to use different [release kinds](/docs/en/faq/operations/production.md) based on your needs.
+You can replace `stable` with `lts` to use different [release kinds](/knowledgebase/production) based on your needs.

 You can also download and install packages manually from [here](https://packages.clickhouse.com/deb/pool/main/c/).

@ -272,7 +234,7 @@ clickhouse-client # or "clickhouse-client --password" if you set up a password.

 </details>

-You can replace `stable` with `lts` to use different [release kinds](/docs/en/faq/operations/production.md) based on your needs.
+You can replace `stable` with `lts` to use different [release kinds](/knowledgebase/production) based on your needs.

 Then run these commands to install packages:

--- a/docs/en/getting-started/playground.md
+++ b/docs/en/getting-started/playground.md
@ -1,5 +1,5 @@
 ---
-sidebar_label: Playground
+sidebar_label: ClickHouse Playground
 sidebar_position: 2
 keywords: [clickhouse, playground, getting, started, docs]
 description: The ClickHouse Playground allows people to experiment with ClickHouse by running queries instantly, without setting up their server or cluster.
@ -11,7 +11,7 @@ slug: /en/getting-started/playground
 [ClickHouse Playground](https://play.clickhouse.com/play?user=play) allows people to experiment with ClickHouse by running queries instantly, without setting up their server or cluster.
 Several example datasets are available in Playground.

-You can make queries to Playground using any HTTP client, for example [curl](https://curl.haxx.se) or [wget](https://www.gnu.org/software/wget/), or set up a connection using [JDBC](../interfaces/jdbc.md) or [ODBC](../interfaces/odbc.md) drivers. More information about software products that support ClickHouse is available [here](../interfaces).
+You can make queries to Playground using any HTTP client, for example [curl](https://curl.haxx.se) or [wget](https://www.gnu.org/software/wget/), or set up a connection using [JDBC](../interfaces/jdbc.md) or [ODBC](../interfaces/odbc.md) drivers. More information about software products that support ClickHouse is available [here](../integrations/index.mdx).

 ## Credentials {#credentials}

--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@ -1,7 +1,7 @@
 ---
 slug: /en/interfaces/formats
 sidebar_position: 21
-sidebar_label: Input and Output Formats
+sidebar_label: View all formats...
 title: Formats for Input and Output Data
 ---

@ -154,7 +154,7 @@ Arrays are written as a list of comma-separated values in square brackets. Numbe
 In input data, ENUM values can be represented as names or as ids. First, we try to match the input value to the ENUM name. If we fail and the input value is a number, we try to match this number to ENUM id.
 If input data contains only ENUM ids, it's recommended to enable the setting [input_format_tsv_enum_as_number](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_enum_as_number) to optimize ENUM parsing.

-Each element of [Nested](/docs/en/sql-reference/data-types/nested-data-structures/nested.md) structures is represented as an array.
+Each element of [Nested](/docs/en/sql-reference/data-types/nested-data-structures/index.md) structures is represented as an array.

 For example:

@ -684,7 +684,7 @@ Example:
 ## JSONColumns {#jsoncolumns}

 :::tip
-The output of the JSONColumns* formats provides the ClickHouse field name and then the content of each row of the table for that field; 
+The output of the JSONColumns* formats provides the ClickHouse field name and then the content of each row of the table for that field;
 visually, the data is rotated 90 degrees to the left.
 :::

@ -1150,7 +1150,7 @@ Any set of bytes can be output in the strings. Use the `JSONEachRow` format if y

 ### Usage of Nested Structures {#jsoneachrow-nested}

-If you have a table with [Nested](/docs/en/sql-reference/data-types/nested-data-structures/nested.md) data type columns, you can insert JSON data with the same structure. Enable this feature with the [input_format_import_nested_json](/docs/en/operations/settings/settings-formats.md/#input_format_import_nested_json) setting.
+If you have a table with [Nested](/docs/en/sql-reference/data-types/nested-data-structures/index.md) data type columns, you can insert JSON data with the same structure. Enable this feature with the [input_format_import_nested_json](/docs/en/operations/settings/settings-formats.md/#input_format_import_nested_json) setting.

 For example, consider the following table:

@ -1776,7 +1776,7 @@ message MessageType {
 ```

 ClickHouse tries to find a column named `x.y.z` (or `x_y_z` or `X.y_Z` and so on).
-Nested messages are suitable to input or output a [nested data structures](/docs/en/sql-reference/data-types/nested-data-structures/nested.md).
+Nested messages are suitable to input or output a [nested data structures](/docs/en/sql-reference/data-types/nested-data-structures/index.md).

 Default values defined in a protobuf schema like this

@ -1808,23 +1808,26 @@ ClickHouse Avro format supports reading and writing [Avro data files](https://av

 The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries.

-| Avro data type `INSERT`                     | ClickHouse data type                                                                                            | Avro data type `SELECT`                         |
-|---------------------------------------------|-----------------------------------------------------------------------------------------------------------------|-------------------------------------------------|
-| `boolean`, `int`, `long`, `float`, `double` | [Int(8\                                                                                                         | 16\                                             |32)](/docs/en/sql-reference/data-types/int-uint.md), [UInt(8\|16\|32)](/docs/en/sql-reference/data-types/int-uint.md) | `int`                        |
-| `boolean`, `int`, `long`, `float`, `double` | [Int64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `long`                                          |
-| `boolean`, `int`, `long`, `float`, `double` | [Float32](/docs/en/sql-reference/data-types/float.md)                                                           | `float`                                         |
-| `boolean`, `int`, `long`, `float`, `double` | [Float64](/docs/en/sql-reference/data-types/float.md)                                                           | `double`                                        |
-| `bytes`, `string`, `fixed`, `enum`          | [String](/docs/en/sql-reference/data-types/string.md)                                                           | `bytes` or `string` \*                          |
-| `bytes`, `string`, `fixed`                  | [FixedString(N)](/docs/en/sql-reference/data-types/fixedstring.md)                                              | `fixed(N)`                                      |
-| `enum`                                      | [Enum(8\                                                                                                        | 16)](/docs/en/sql-reference/data-types/enum.md) | `enum`                       |
-| `array(T)`                                  | [Array(T)](/docs/en/sql-reference/data-types/array.md)                                                          | `array(T)`                                      |
-| `union(null, T)`, `union(T, null)`          | [Nullable(T)](/docs/en/sql-reference/data-types/date.md)                                                        | `union(null, T)`                                |
-| `null`                                      | [Nullable(Nothing)](/docs/en/sql-reference/data-types/special-data-types/nothing.md)                            | `null`                                          |
-| `int (date)` \**                            | [Date](/docs/en/sql-reference/data-types/date.md)                                                               | `int (date)` \**                                |
-| `long (timestamp-millis)` \**               | [DateTime64(3)](/docs/en/sql-reference/data-types/datetime.md)                                                  | `long (timestamp-millis)` \*                    |
-| `long (timestamp-micros)` \**               | [DateTime64(6)](/docs/en/sql-reference/data-types/datetime.md)                                                  | `long (timestamp-micros)` \*                    |
-| `int`                                       | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md)                                                       | `int`                                           |
-| `fixed(16)`                                 | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md)                                                       | `fixed(16)`                                     |
+| Avro data type `INSERT`                     | ClickHouse data type                                                                                                          | Avro data type `SELECT`       |
+|---------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------|-------------------------------|
+| `boolean`, `int`, `long`, `float`, `double` | [Int(8\16\32)](/docs/en/sql-reference/data-types/int-uint.md), [UInt(8\16\32)](/docs/en/sql-reference/data-types/int-uint.md) | `int`                         |
+| `boolean`, `int`, `long`, `float`, `double` | [Int64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)               | `long`                        |
+| `boolean`, `int`, `long`, `float`, `double` | [Float32](/docs/en/sql-reference/data-types/float.md)                                                                         | `float`                       |
+| `boolean`, `int`, `long`, `float`, `double` | [Float64](/docs/en/sql-reference/data-types/float.md)                                                                         | `double`                      |
+| `bytes`, `string`, `fixed`, `enum`          | [String](/docs/en/sql-reference/data-types/string.md)                                                                         | `bytes` or `string` \*        |
+| `bytes`, `string`, `fixed`                  | [FixedString(N)](/docs/en/sql-reference/data-types/fixedstring.md)                                                            | `fixed(N)`                    |
+| `enum`                                      | [Enum(8\16)](/docs/en/sql-reference/data-types/enum.md)                                                                       | `enum`                        |
+| `array(T)`                                  | [Array(T)](/docs/en/sql-reference/data-types/array.md)                                                                        | `array(T)`                    |
+| `union(null, T)`, `union(T, null)`          | [Nullable(T)](/docs/en/sql-reference/data-types/date.md)                                                                      | `union(null, T)`              |
+| `null`                                      | [Nullable(Nothing)](/docs/en/sql-reference/data-types/special-data-types/nothing.md)                                          | `null`                        |
+| `int (date)` \**                            | [Date](/docs/en/sql-reference/data-types/date.md), [Date32](docs/en/sql-reference/data-types/date32.md)                       | `int (date)` \**              |
+| `long (timestamp-millis)` \**               | [DateTime64(3)](/docs/en/sql-reference/data-types/datetime.md)                                                                | `long (timestamp-millis)` \** |
+| `long (timestamp-micros)` \**               | [DateTime64(6)](/docs/en/sql-reference/data-types/datetime.md)                                                                | `long (timestamp-micros)` \** |
+| `int`                                       | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md)                                                                     | `int`                         |
+| `fixed(16)`                                 | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md)                                                                     | `fixed(16)`                   |
+| `bytes (decimal)` \**                       | [Decimal(P, S)](/docs/en/sql-reference/data-types/decimal.md)                                                                | `bytes (decimal)` \**         |
+| `string (uuid)` \**                         | [UUID](/docs/en/sql-reference/data-types/uuid.md)                                                                            | `string (uuid)` \**           |
+

 \* `bytes` is default, controlled by [output_format_avro_string_column_pattern](/docs/en/operations/settings/settings-formats.md/#output_format_avro_string_column_pattern)
 \** [Avro logical types](https://avro.apache.org/docs/current/spec.html#Logical+Types)
@ -1975,7 +1978,7 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t

 - [output_format_parquet_row_group_size](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_row_group_size) - row group size in rows while data output. Default value - `1000000`.
 - [output_format_parquet_string_as_string](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_string_as_string) - use Parquet String type instead of Binary for String columns. Default value - `false`.
- [input_format_parquet_import_nested](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_import_nested) - allow inserting array of structs into [Nested](/docs/en/sql-reference/data-types/nested-data-structures/nested.md) table in Parquet input format. Default value - `false`.
+- [input_format_parquet_import_nested](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_import_nested) - allow inserting array of structs into [Nested](/docs/en/sql-reference/data-types/nested-data-structures/index.md) table in Parquet input format. Default value - `false`.
 - [input_format_parquet_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_case_insensitive_column_matching) - ignore case when matching Parquet columns with ClickHouse columns. Default value - `false`.
 - [input_format_parquet_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_allow_missing_columns) - allow missing columns while reading Parquet data. Default value - `false`.
 - [input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Parquet format. Default value - `false`.
--- a/docs/en/interfaces/overview.md
+++ b/docs/en/interfaces/overview.md
@ -6,7 +6,7 @@ keywords: [clickhouse, network, interfaces, http, tcp, grpc, command-line, clien
 description: ClickHouse provides three network interfaces
 ---

-# Interfaces
+# Drivers and Interfaces

 ClickHouse provides three network interfaces (they can be optionally wrapped in TLS for additional security):

--- a/docs/en/interfaces/postgresql.md
+++ b/docs/en/interfaces/postgresql.md
@ -8,7 +8,7 @@ sidebar_label: PostgreSQL Interface

 ClickHouse supports the PostgreSQL wire protocol, which allows you to use Postgres clients to connect to ClickHouse. In a sense, ClickHouse can pretend to be a PostgreSQL instance - allowing you to connect a PostgreSQL client application to ClickHouse that is not already directly supported by ClickHouse (for example, Amazon Redshift).

-To enable the PostgreSQL wire protocol, add the [postgresql_port](../operations/server-configuration-parameters/settings#server_configuration_parameters-postgresql_port) setting to your server's configuration file. For example, you could define the port in a new XML file in your `config.d` folder:
+To enable the PostgreSQL wire protocol, add the [postgresql_port](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-postgresql_port) setting to your server's configuration file. For example, you could define the port in a new XML file in your `config.d` folder:

 ```xml
 <clickhouse>
--- a/docs/en/interfaces/schema-inference.md
+++ b/docs/en/interfaces/schema-inference.md
@ -1473,6 +1473,7 @@ In Avro format ClickHouse reads its schema from the data and converts it to Clic
 |------------------------------------|--------------------------------------------------------------------------------|
 | `boolean`                          | [Bool](../sql-reference/data-types/boolean.md)                                 |
 | `int`                              | [Int32](../sql-reference/data-types/int-uint.md)                               |
+| `int (date)` \*                    | [Date32](../sql-reference/data-types/date32.md)                                |
 | `long`                             | [Int64](../sql-reference/data-types/int-uint.md)                               |
 | `float`                            | [Float32](../sql-reference/data-types/float.md)                                |
 | `double`                           | [Float64](../sql-reference/data-types/float.md)                                |
@ -1482,6 +1483,10 @@ In Avro format ClickHouse reads its schema from the data and converts it to Clic
 | `array(T)`                         | [Array(T)](../sql-reference/data-types/array.md)                               |
 | `union(null, T)`, `union(T, null)` | [Nullable(T)](../sql-reference/data-types/date.md)                             |
 | `null`                             | [Nullable(Nothing)](../sql-reference/data-types/special-data-types/nothing.md) |
+| `string (uuid)` \*                 | [UUID](../sql-reference/data-types/uuid.md)                                    |
+| `binary (decimal)` \*              | [Decimal(P, S)](../sql-reference/data-types/decimal.md)                         |
+
+\* [Avro logical types](https://avro.apache.org/docs/current/spec.html#Logical+Types)

 Other Avro types are not supported.

--- a/docs/en/operations/_category_.yml
+++ b/docs/en/operations/_category_.yml
@ -2,7 +2,3 @@ position: 70
 label: 'Operations'
 collapsible: true
 collapsed: true
-link:
-  type: generated-index
-  title: Operations
-  slug: /en/operations
--- a/docs/en/operations/access-rights.md
+++ b/docs/en/operations/access-rights.md
@ -1,152 +0,0 @@
---
-slug: /en/operations/access-rights
-sidebar_position: 48
-sidebar_label: Access Control and Account Management
-title: Access Control and Account Management
---
-
-ClickHouse supports access control management based on [RBAC](https://en.wikipedia.org/wiki/Role-based_access_control) approach.
-
-ClickHouse access entities:
- [User account](#user-account-management)
- [Role](#role-management)
- [Row Policy](#row-policy-management)
- [Settings Profile](#settings-profiles-management)
- [Quota](#quotas-management)
-
-You can configure access entities using:
-
-   SQL-driven workflow.
-
-    You need to [enable](#enabling-access-control) this functionality.
-
-   Server [configuration files](../operations/configuration-files.md) `users.xml` and `config.xml`.
-
-We recommend using SQL-driven workflow. Both of the configuration methods work simultaneously, so if you use the server configuration files for managing accounts and access rights, you can smoothly switch to SQL-driven workflow.
-
-:::warning
-You can’t manage the same access entity by both configuration methods simultaneously.
-:::
-
-To see all users, roles, profiles, etc. and all their grants use [SHOW ACCESS](../sql-reference/statements/show.md#show-access-statement) statement.
-
-## Usage {#access-control-usage}
-
-By default, the ClickHouse server provides the `default` user account which is not allowed using SQL-driven access control and account management but has all the rights and permissions. The `default` user account is used in any cases when the username is not defined, for example, at login from client or in distributed queries. In distributed query processing a default user account is used, if the configuration of the server or cluster does not specify the [user and password](../engines/table-engines/special/distributed.md) properties.
-
-If you just started using ClickHouse, consider the following scenario:
-
-1.  [Enable](#enabling-access-control) SQL-driven access control and account management for the `default` user.
-2.  Log in to the `default` user account and create all the required users. Don’t forget to create an administrator account (`GRANT ALL ON *.* TO admin_user_account WITH GRANT OPTION`).
-3.  [Restrict permissions](../operations/settings/permissions-for-queries.md#permissions_for_queries) for the `default` user and disable SQL-driven access control and account management for it.
-
-### Properties of Current Solution {#access-control-properties}
-
-   You can grant permissions for databases and tables even if they do not exist.
-   If a table was deleted, all the privileges that correspond to this table are not revoked. This means that even if you create a new table with the same name later, all the privileges remain valid. To revoke privileges corresponding to the deleted table, you need to execute, for example, the `REVOKE ALL PRIVILEGES ON db.table FROM ALL` query.
-   There are no lifetime settings for privileges.
-
-## User Account {#user-account-management}
-
-A user account is an access entity that allows to authorize someone in ClickHouse. A user account contains:
-
-   Identification information.
-   [Privileges](../sql-reference/statements/grant.md#grant-privileges) that define a scope of queries the user can execute.
-   Hosts allowed to connect to the ClickHouse server.
-   Assigned and default roles.
-   Settings with their constraints applied by default at user login.
-   Assigned settings profiles.
-
-Privileges can be granted to a user account by the [GRANT](../sql-reference/statements/grant.md) query or by assigning [roles](#role-management). To revoke privileges from a user, ClickHouse provides the [REVOKE](../sql-reference/statements/revoke.md) query. To list privileges for a user, use the [SHOW GRANTS](../sql-reference/statements/show.md#show-grants-statement) statement.
-
-Management queries:
-
-   [CREATE USER](../sql-reference/statements/create/user.md)
-   [ALTER USER](../sql-reference/statements/alter/user.md#alter-user-statement)
-   [DROP USER](../sql-reference/statements/drop.md)
-   [SHOW CREATE USER](../sql-reference/statements/show.md#show-create-user-statement)
-   [SHOW USERS](../sql-reference/statements/show.md#show-users-statement)
-
-### Settings Applying {#access-control-settings-applying}
-
-Settings can be configured differently: for a user account, in its granted roles and in settings profiles. At user login, if a setting is configured for different access entities, the value and constraints of this setting are applied as follows (from higher to lower priority):
-
-1.  User account settings.
-2.  The settings of default roles of the user account. If a setting is configured in some roles, then order of the setting application is undefined.
-3.  The settings from settings profiles assigned to a user or to its default roles. If a setting is configured in some profiles, then order of setting application is undefined.
-4.  Settings applied to all the server by default or from the [default profile](../operations/server-configuration-parameters/settings.md#default-profile).
-
-## Role {#role-management}
-
-Role is a container for access entities that can be granted to a user account.
-
-Role contains:
-
-   [Privileges](../sql-reference/statements/grant.md#grant-privileges)
-   Settings and constraints
-   List of assigned roles
-
-Management queries:
-
-   [CREATE ROLE](../sql-reference/statements/create/role.md)
-   [ALTER ROLE](../sql-reference/statements/alter/role.md#alter-role-statement)
-   [DROP ROLE](../sql-reference/statements/drop.md)
-   [SET ROLE](../sql-reference/statements/set-role.md)
-   [SET DEFAULT ROLE](../sql-reference/statements/set-role.md#set-default-role-statement)
-   [SHOW CREATE ROLE](../sql-reference/statements/show.md#show-create-role-statement)
-   [SHOW ROLES](../sql-reference/statements/show.md#show-roles-statement)
-
-Privileges can be granted to a role by the [GRANT](../sql-reference/statements/grant.md) query. To revoke privileges from a role ClickHouse provides the [REVOKE](../sql-reference/statements/revoke.md) query.
-
-## Row Policy {#row-policy-management}
-
-Row policy is a filter that defines which of the rows are available to a user or a role. Row policy contains filters for one particular table, as well as a list of roles and/or users which should use this row policy.
-
-:::warning
-Row policies makes sense only for users with readonly access. If user can modify table or copy partitions between tables, it defeats the restrictions of row policies.
-:::
-
-Management queries:
-
-   [CREATE ROW POLICY](../sql-reference/statements/create/row-policy.md)
-   [ALTER ROW POLICY](../sql-reference/statements/alter/row-policy.md#alter-row-policy-statement)
-   [DROP ROW POLICY](../sql-reference/statements/drop.md#drop-row-policy-statement)
-   [SHOW CREATE ROW POLICY](../sql-reference/statements/show.md#show-create-row-policy-statement)
-   [SHOW POLICIES](../sql-reference/statements/show.md#show-policies-statement)
-
-## Settings Profile {#settings-profiles-management}
-
-Settings profile is a collection of [settings](../operations/settings/index.md). Settings profile contains settings and constraints, as well as a list of roles and/or users to which this profile is applied.
-
-Management queries:
-
-   [CREATE SETTINGS PROFILE](../sql-reference/statements/create/settings-profile.md#create-settings-profile-statement)
-   [ALTER SETTINGS PROFILE](../sql-reference/statements/alter/settings-profile.md#alter-settings-profile-statement)
-   [DROP SETTINGS PROFILE](../sql-reference/statements/drop.md#drop-settings-profile-statement)
-   [SHOW CREATE SETTINGS PROFILE](../sql-reference/statements/show.md#show-create-settings-profile-statement)
-   [SHOW PROFILES](../sql-reference/statements/show.md#show-profiles-statement)
-
-## Quota {#quotas-management}
-
-Quota limits resource usage. See [Quotas](../operations/quotas.md).
-
-Quota contains a set of limits for some durations, as well as a list of roles and/or users which should use this quota.
-
-Management queries:
-
-   [CREATE QUOTA](../sql-reference/statements/create/quota.md)
-   [ALTER QUOTA](../sql-reference/statements/alter/quota.md#alter-quota-statement)
-   [DROP QUOTA](../sql-reference/statements/drop.md#drop-quota-statement)
-   [SHOW CREATE QUOTA](../sql-reference/statements/show.md#show-create-quota-statement)
-   [SHOW QUOTA](../sql-reference/statements/show.md#show-quota-statement)
-   [SHOW QUOTAS](../sql-reference/statements/show.md#show-quotas-statement)
-
-## Enabling SQL-driven Access Control and Account Management {#enabling-access-control}
-
-   Setup a directory for configurations storage.
-
-    ClickHouse stores access entity configurations in the folder set in the [access_control_path](../operations/server-configuration-parameters/settings.md#access_control_path) server configuration parameter.
-
-   Enable SQL-driven access control and account management for at least one user account.
-
-    By default, SQL-driven access control and account management is disabled for all users. You need to configure at least one user in the `users.xml` configuration file and set the value of the [access_management](../operations/settings/settings-users.md#access_management-user-setting) setting to 1.
--- a/docs/en/operations/backup.md
+++ b/docs/en/operations/backup.md
@ -1,5 +1,6 @@
 ---
 slug: /en/operations/backup
+description: In order to effectively mitigate possible human errors, you should carefully prepare a strategy for backing up and restoring your data.
 ---

 # Backup and Restore
@ -213,7 +214,7 @@ To write backups to an S3 bucket you need three pieces of information:
  for example `Abc+123`

 :::note
-Creating an S3 bucket is covered in [Use S3 Object Storage as a ClickHouse disk](/docs/en/integrations/data-ingestion/s3/configuring-s3-for-clickhouse-use.md), just come back to this doc after saving the policy, there is no need to configure ClickHouse to use the S3 bucket.
+Creating an S3 bucket is covered in [Use S3 Object Storage as a ClickHouse disk](/docs/en/integrations/data-ingestion/s3/index.md#configuring-s3-for-clickhouse-use), just come back to this doc after saving the policy, there is no need to configure ClickHouse to use the S3 bucket.
 :::

 The destination for a backup will be specified like this:
@ -330,7 +331,7 @@ It is also possible to `BACKUP`/`RESTORE` to S3 by configuring an S3 disk in the
            <s3>
                <volumes>
                    <main>
-                        <disk>s3</disk>
+                        <disk>s3_plain</disk>
                    </main>
                </volumes>
            </s3>
--- a/docs/en/operations/caches.md
+++ b/docs/en/operations/caches.md
@ -3,6 +3,7 @@ slug: /en/operations/caches
 sidebar_position: 65
 sidebar_label: Caches
 title: "Cache Types"
+description: When performing queries, ClickHouse uses different caches.
 ---

 When performing queries, ClickHouse uses different caches.
--- a/docs/en/operations/clickhouse-keeper.md
+++ b/docs/en/operations/clickhouse-keeper.md
@ -1,378 +0,0 @@
---
-slug: /en/operations/clickhouse-keeper
-sidebar_position: 66
-sidebar_label: ClickHouse Keeper
---
-
-# ClickHouse Keeper
-import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_automated.md';
-
-<SelfManaged />
-
-ClickHouse Keeper provides the coordination system for data [replication](../engines/table-engines/mergetree-family/replication.md) and [distributed DDL](../sql-reference/distributed-ddl.md) queries execution. ClickHouse Keeper is compatible with ZooKeeper.
-
-## Implementation details {#implementation-details}
-
-ZooKeeper is one of the first well-known open-source coordination systems. It's implemented in Java, and has quite a simple and powerful data model. ZooKeeper's coordination algorithm, ZooKeeper Atomic Broadcast (ZAB), doesn't provide linearizability guarantees for reads, because each ZooKeeper node serves reads locally. Unlike ZooKeeper ClickHouse Keeper is written in C++ and uses the [RAFT algorithm](https://raft.github.io/) [implementation](https://github.com/eBay/NuRaft). This algorithm allows linearizability for reads and writes, and has several open-source implementations in different languages.
-
-By default, ClickHouse Keeper provides the same guarantees as ZooKeeper (linearizable writes, non-linearizable reads). It has a compatible client-server protocol, so any standard ZooKeeper client can be used to interact with ClickHouse Keeper. Snapshots and logs have an incompatible format with ZooKeeper, but the `clickhouse-keeper-converter` tool enables the conversion of ZooKeeper data to ClickHouse Keeper snapshots. The interserver protocol in ClickHouse Keeper is also incompatible with ZooKeeper so a mixed ZooKeeper / ClickHouse Keeper cluster is impossible.
-
-ClickHouse Keeper supports Access Control Lists (ACLs) the same way as [ZooKeeper](https://zookeeper.apache.org/doc/r3.1.2/zookeeperProgrammers.html#sc_ZooKeeperAccessControl) does. ClickHouse Keeper supports the same set of permissions and has the identical built-in schemes: `world`, `auth` and `digest`. The digest authentication scheme uses the pair `username:password`, the password is encoded in Base64.
-
-:::note
-External integrations are not supported.
-:::
-
-## Configuration {#configuration}
-
-ClickHouse Keeper can be used as a standalone replacement for ZooKeeper or as an internal part of the ClickHouse server. In both cases the configuration is almost the same `.xml` file. The main ClickHouse Keeper configuration tag is `<keeper_server>`. Keeper configuration has the following parameters:
-
-    `tcp_port` — Port for a client to connect (default for ZooKeeper is `2181`).
-    `tcp_port_secure` — Secure port for an SSL connection between client and keeper-server.
-    `server_id` — Unique server id, each participant of the ClickHouse Keeper cluster must have a unique number (1, 2, 3, and so on).
-    `log_storage_path` — Path to coordination logs, just like ZooKeeper it is best to store logs on non-busy nodes.
-    `snapshot_storage_path` — Path to coordination snapshots.
-
-Other common parameters are inherited from the ClickHouse server config (`listen_host`, `logger`, and so on).
-
-Internal coordination settings are located in the `<keeper_server>.<coordination_settings>` section:
-
-    `operation_timeout_ms` — Timeout for a single client operation (ms) (default: 10000).
-    `min_session_timeout_ms` — Min timeout for client session (ms) (default: 10000).
-    `session_timeout_ms` — Max timeout for client session (ms) (default: 100000).
-    `dead_session_check_period_ms` — How often ClickHouse Keeper checks for dead sessions and removes them (ms) (default: 500).
-    `heart_beat_interval_ms` — How often a ClickHouse Keeper leader will send heartbeats to followers (ms) (default: 500).
-    `election_timeout_lower_bound_ms` — If the follower does not receive a heartbeat from the leader in this interval, then it can initiate leader election (default: 1000). Must be less than or equal to `election_timeout_upper_bound_ms`. Ideally they shouldn't be equal.
-    `election_timeout_upper_bound_ms` — If the follower does not receive a heartbeat from the leader in this interval, then it must initiate leader election (default: 2000).
-    `rotate_log_storage_interval` — How many log records to store in a single file (default: 100000).
-    `reserved_log_items` — How many coordination log records to store before compaction (default: 100000).
-    `snapshot_distance` — How often ClickHouse Keeper will create new snapshots (in the number of records in logs) (default: 100000).
-    `snapshots_to_keep` — How many snapshots to keep (default: 3).
-    `stale_log_gap` — Threshold when leader considers follower as stale and sends the snapshot to it instead of logs (default: 10000).
-    `fresh_log_gap` — When node became fresh (default: 200).
-    `max_requests_batch_size` - Max size of batch in requests count before it will be sent to RAFT (default: 100).
-    `force_sync` — Call `fsync` on each write to coordination log (default: true).
-    `quorum_reads` — Execute read requests as writes through whole RAFT consensus with similar speed (default: false).
-    `raft_logs_level` — Text logging level about coordination (trace, debug, and so on) (default: system default).
-    `auto_forwarding` — Allow to forward write requests from followers to the leader (default: true).
-    `shutdown_timeout` — Wait to finish internal connections and shutdown (ms) (default: 5000).
-    `startup_timeout` — If the server doesn't connect to other quorum participants in the specified timeout it will terminate (ms) (default: 30000).
-    `four_letter_word_white_list` — White list of 4lw commands (default: `conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif,rqld`).
-
-Quorum configuration is located in the `<keeper_server>.<raft_configuration>` section and contain servers description.
-
-The only parameter for the whole quorum is `secure`, which enables encrypted connection for communication between quorum participants. The parameter can be set `true` if SSL connection is required for internal communication between nodes, or left unspecified otherwise.
-
-The main parameters for each `<server>` are:
-
-    `id` — Server identifier in a quorum.
-    `hostname` — Hostname where this server is placed.
-    `port` — Port where this server listens for connections.
-
-:::note
-In the case of a change in the topology of your ClickHouse Keeper cluster (e.g., replacing a server), please make sure to keep the mapping of `server_id` to `hostname` consistent and avoid shuffling or reusing an existing `server_id` for different servers (e.g., it can happen if your rely on automation scripts to deploy ClickHouse Keeper)
-:::
-
-Examples of configuration for quorum with three nodes can be found in [integration tests](https://github.com/ClickHouse/ClickHouse/tree/master/tests/integration) with `test_keeper_` prefix. Example configuration for server #1:
-
-```xml
-<keeper_server>
-    <tcp_port>2181</tcp_port>
-    <server_id>1</server_id>
-    <log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
-    <snapshot_storage_path>/var/lib/clickhouse/coordination/snapshots</snapshot_storage_path>
-
-    <coordination_settings>
-        <operation_timeout_ms>10000</operation_timeout_ms>
-        <session_timeout_ms>30000</session_timeout_ms>
-        <raft_logs_level>trace</raft_logs_level>
-    </coordination_settings>
-
-    <raft_configuration>
-        <server>
-            <id>1</id>
-            <hostname>zoo1</hostname>
-            <port>9444</port>
-        </server>
-        <server>
-            <id>2</id>
-            <hostname>zoo2</hostname>
-            <port>9444</port>
-        </server>
-        <server>
-            <id>3</id>
-            <hostname>zoo3</hostname>
-            <port>9444</port>
-        </server>
-    </raft_configuration>
-</keeper_server>
-```
-
-## How to run {#how-to-run}
-
-ClickHouse Keeper is bundled into the ClickHouse server package, just add configuration of `<keeper_server>` and start ClickHouse server as always. If you want to run standalone ClickHouse Keeper you can start it in a similar way with:
-
-```bash
-clickhouse-keeper --config /etc/your_path_to_config/config.xml
-```
-
-If you don't have the symlink (`clickhouse-keeper`) you can create it or specify `keeper` as an argument to `clickhouse`:
-
-```bash
-clickhouse keeper --config /etc/your_path_to_config/config.xml
-```
-
-## Four Letter Word Commands {#four-letter-word-commands}
-
-ClickHouse Keeper also provides 4lw commands which are almost the same with Zookeeper. Each command is composed of four letters such as `mntr`, `stat` etc. There are some more interesting commands: `stat` gives some general information about the server and connected clients, while `srvr` and `cons` give extended details on server and connections respectively.
-
-The 4lw commands has a white list configuration `four_letter_word_white_list` which has default value `conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif,rqld`.
-
-You can issue the commands to ClickHouse Keeper via telnet or nc, at the client port.
-
-```
-echo mntr | nc localhost 9181
-```
-
-Bellow is the detailed 4lw commands:
-
- `ruok`: Tests if server is running in a non-error state. The server will respond with `imok` if it is running. Otherwise it will not respond at all. A response of `imok` does not necessarily indicate that the server has joined the quorum, just that the server process is active and bound to the specified client port. Use "stat" for details on state wrt quorum and client connection information.
-
-```
-imok
-```
-
- `mntr`: Outputs a list of variables that could be used for monitoring the health of the cluster.
-
-```
-zk_version      v21.11.1.1-prestable-7a4a0b0edef0ad6e0aa662cd3b90c3f4acf796e7
-zk_avg_latency  0
-zk_max_latency  0
-zk_min_latency  0
-zk_packets_received     68
-zk_packets_sent 68
-zk_num_alive_connections        1
-zk_outstanding_requests 0
-zk_server_state leader
-zk_znode_count  4
-zk_watch_count  1
-zk_ephemerals_count     0
-zk_approximate_data_size        723
-zk_open_file_descriptor_count   310
-zk_max_file_descriptor_count    10240
-zk_followers    0
-zk_synced_followers     0
-```
-
- `srvr`: Lists full details for the server.
-
-```
-ClickHouse Keeper version: v21.11.1.1-prestable-7a4a0b0edef0ad6e0aa662cd3b90c3f4acf796e7
-Latency min/avg/max: 0/0/0
-Received: 2
-Sent : 2
-Connections: 1
-Outstanding: 0
-Zxid: 34
-Mode: leader
-Node count: 4
-```
-
- `stat`: Lists brief details for the server and connected clients.
-
-```
-ClickHouse Keeper version: v21.11.1.1-prestable-7a4a0b0edef0ad6e0aa662cd3b90c3f4acf796e7
-Clients:
- 192.168.1.1:52852(recved=0,sent=0)
- 192.168.1.1:52042(recved=24,sent=48)
-Latency min/avg/max: 0/0/0
-Received: 4
-Sent : 4
-Connections: 1
-Outstanding: 0
-Zxid: 36
-Mode: leader
-Node count: 4
-```
-
- `srst`: Reset server statistics. The command will affect the result of `srvr`, `mntr` and `stat`.
-
-```
-Server stats reset.
-```
-
- `conf`: Print details about serving configuration.
-
-```
-server_id=1
-tcp_port=2181
-four_letter_word_white_list=*
-log_storage_path=./coordination/logs
-snapshot_storage_path=./coordination/snapshots
-max_requests_batch_size=100
-session_timeout_ms=30000
-operation_timeout_ms=10000
-dead_session_check_period_ms=500
-heart_beat_interval_ms=500
-election_timeout_lower_bound_ms=1000
-election_timeout_upper_bound_ms=2000
-reserved_log_items=1000000000000000
-snapshot_distance=10000
-auto_forwarding=true
-shutdown_timeout=5000
-startup_timeout=240000
-raft_logs_level=information
-snapshots_to_keep=3
-rotate_log_storage_interval=100000
-stale_log_gap=10000
-fresh_log_gap=200
-max_requests_batch_size=100
-quorum_reads=false
-force_sync=false
-compress_logs=true
-compress_snapshots_with_zstd_format=true
-configuration_change_tries_count=20
-```
-
- `cons`: List full connection/session details for all clients connected to this server. Includes information on numbers of packets received/sent, session id, operation latencies, last operation performed, etc...
-
-```
- 192.168.1.1:52163(recved=0,sent=0,sid=0xffffffffffffffff,lop=NA,est=1636454787393,to=30000,lzxid=0xffffffffffffffff,lresp=0,llat=0,minlat=0,avglat=0,maxlat=0)
- 192.168.1.1:52042(recved=9,sent=18,sid=0x0000000000000001,lop=List,est=1636454739887,to=30000,lcxid=0x0000000000000005,lzxid=0x0000000000000005,lresp=1636454739892,llat=0,minlat=0,avglat=0,maxlat=0)
-```
-
- `crst`: Reset connection/session statistics for all connections.
-
-```
-Connection stats reset.
-```
-
- `envi`: Print details about serving environment
-
-```
-Environment:
-clickhouse.keeper.version=v21.11.1.1-prestable-7a4a0b0edef0ad6e0aa662cd3b90c3f4acf796e7
-host.name=ZBMAC-C02D4054M.local
-os.name=Darwin
-os.arch=x86_64
-os.version=19.6.0
-cpu.count=12
-user.name=root
-user.home=/Users/JackyWoo/
-user.dir=/Users/JackyWoo/project/jd/clickhouse/cmake-build-debug/programs/
-user.tmp=/var/folders/b4/smbq5mfj7578f2jzwn602tt40000gn/T/
-```
-
-
- `dirs`: Shows the total size of snapshot and log files in bytes
-
-```
-snapshot_dir_size: 0
-log_dir_size: 3875
-```
-
- `isro`: Tests if server is running in read-only mode. The server will respond with "ro" if in read-only mode or "rw" if not in read-only mode.
-
-```
-rw
-```
-
- `wchs`: Lists brief information on watches for the server.
-
-```
-1 connections watching 1 paths
-Total watches:1
-```
-
- `wchc`: Lists detailed information on watches for the server, by session. This outputs a list of sessions (connections) with associated watches (paths). Note, depending on the number of watches this operation may be expensive (ie impact server performance), use it carefully.
-
-```
-0x0000000000000001
-    /clickhouse/task_queue/ddl
-```
-
- `wchp`: Lists detailed information on watches for the server, by path. This outputs a list of paths (znodes) with associated sessions. Note, depending on the number of watches this operation may be expensive (i. e. impact server performance), use it carefully.
-
-```
-/clickhouse/task_queue/ddl
-    0x0000000000000001
-```
-
- `dump`: Lists the outstanding sessions and ephemeral nodes. This only works on the leader.
-
-```
-Sessions dump (2):
-0x0000000000000001
-0x0000000000000002
-Sessions with Ephemerals (1):
-0x0000000000000001
- /clickhouse/task_queue/ddl
-```
-
- `csnp`: Schedule a snapshot creation task. Return the last committed log index of the scheduled snapshot if success or `Failed to schedule snapshot creation task.` if failed. Note that `lgif` command can help you determine whether the snapshot is done.
-
-```
-100
-```
-
- `lgif`: Keeper log information. `first_log_idx` : my first log index in log store; `first_log_term` : my first log term; `last_log_idx` : my last log index in log store; `last_log_term` : my last log term; `last_committed_log_idx` : my last committed log index in state machine; `leader_committed_log_idx` : leader's committed log index from my perspective; `target_committed_log_idx` : target log index should be committed to; `last_snapshot_idx` : the largest committed log index in last snapshot.
-
-```
-first_log_idx   1
-first_log_term  1
-last_log_idx    101
-last_log_term   1
-last_committed_log_idx  100
-leader_committed_log_idx    101
-target_committed_log_idx    101
-last_snapshot_idx   50
-```
-
- `rqld`: Request to become new leader. Return `Sent leadership request to leader.` if request sent or `Failed to send leadership request to leader.` if request not sent. Note that if node is already leader the outcome is same as the request is sent. 
-
-```
-Sent leadership request to leader.
-```
-
-## Migration from ZooKeeper {#migration-from-zookeeper}
-
-Seamlessly migration from ZooKeeper to ClickHouse Keeper is impossible you have to stop your ZooKeeper cluster, convert data and start ClickHouse Keeper. `clickhouse-keeper-converter` tool allows converting ZooKeeper logs and snapshots to ClickHouse Keeper snapshot. It works only with ZooKeeper > 3.4. Steps for migration:
-
-1. Stop all ZooKeeper nodes.
-
-2. Optional, but recommended: find ZooKeeper leader node, start and stop it again. It will force ZooKeeper to create a consistent snapshot.
-
-3. Run `clickhouse-keeper-converter` on a leader, for example:
-
-```bash
-clickhouse-keeper-converter --zookeeper-logs-dir /var/lib/zookeeper/version-2 --zookeeper-snapshots-dir /var/lib/zookeeper/version-2 --output-dir /path/to/clickhouse/keeper/snapshots
-```
-
-4. Copy snapshot to ClickHouse server nodes with a configured `keeper` or start ClickHouse Keeper instead of ZooKeeper. The snapshot must persist on all nodes, otherwise, empty nodes can be faster and one of them can become a leader.
-
-
-
-## Recovering after losing quorum
-
-Because ClickHouse Keeper uses Raft it can tolerate certain amount of node crashes depending on the cluster size. \
-E.g. for a 3-node cluster, it will continue working correctly if only 1 node crashes.
-
-Cluster configuration can be dynamically configured but there are some limitations. Reconfiguration relies on Raft also
-so to add/remove a node from the cluster you need to have a quorum. If you lose too many nodes in your cluster at the same time without any chance
-of starting them again, Raft will stop working and not allow you to reconfigure your cluster using the conventional way.
-
-Nevertheless, ClickHouse Keeper has a recovery mode which allows you to forcefully reconfigure your cluster with only 1 node.
-This should be done only as your last resort if you cannot start your nodes again, or start a new instance on the same endpoint.
-
-Important things to note before continuing:
- Make sure that the failed nodes cannot connect to the cluster again.
- Do not start any of the new nodes until it's specified in the steps.
-
-After making sure that the above things are true, you need to do following:
-1. Pick a single Keeper node to be your new leader. Be aware that the data of that node will be used for the entire cluster so we recommend to use a node with the most up to date state.
-2. Before doing anything else, make a backup of the `log_storage_path` and `snapshot_storage_path` folders of the picked node.
-3. Reconfigure the cluster on all of the nodes you want to use.
-4. Send the four letter command `rcvr` to the node you picked which will move the node to the recovery mode OR stop Keeper instance on the picked node and start it again with the `--force-recovery` argument.
-5. One by one, start Keeper instances on the new nodes making sure that `mntr` returns `follower` for the `zk_server_state` before starting the next one.
-6. While in the recovery mode, the leader node will return error message for `mntr` command until it achieves quorum with the new nodes and refuse any requests from the client and the followers.
-7. After quorum is achieved, the leader node will return to the normal mode of operation, accepting all the requests using Raft - verify with `mntr` which should return `leader` for the `zk_server_state`.
--- a/docs/en/operations/external-authenticators/kerberos.md
+++ b/docs/en/operations/external-authenticators/kerberos.md
@ -113,7 +113,7 @@ Note, that now, once user `my_user` uses `kerberos`, Kerberos must be enabled in

 ### Enabling Kerberos using SQL {#enabling-kerberos-using-sql}

-When [SQL-driven Access Control and Account Management](../access-rights.md#access-control) is enabled in ClickHouse, users identified by Kerberos can also be created using SQL statements.
+When [SQL-driven Access Control and Account Management](/docs/en/guides/sre/user-management/index.md#access-control) is enabled in ClickHouse, users identified by Kerberos can also be created using SQL statements.

 ```sql
 CREATE USER my_user IDENTIFIED WITH kerberos REALM 'EXAMPLE.COM'
--- a/docs/en/operations/external-authenticators/ldap.md
+++ b/docs/en/operations/external-authenticators/ldap.md
@ -112,7 +112,7 @@ At each login attempt, ClickHouse tries to "bind" to the specified DN defined by

 Note, that user `my_user` refers to `my_ldap_server`. This LDAP server must be configured in the main `config.xml` file as described previously.

-When SQL-driven [Access Control and Account Management](../access-rights.md#access-control) is enabled, users that are authenticated by LDAP servers can also be created using the [CREATE USER](../../sql-reference/statements/create/user.md#create-user-statement) statement.
+When SQL-driven [Access Control and Account Management](/docs/en/guides/sre/user-management/index.md#access-control) is enabled, users that are authenticated by LDAP servers can also be created using the [CREATE USER](/docs/en/sql-reference/statements/create/user.md#create-user-statement) statement.

 Query:

@ -120,11 +120,11 @@ Query:
 CREATE USER my_user IDENTIFIED WITH ldap SERVER 'my_ldap_server';
 ```

-## LDAP Exernal User Directory {#ldap-external-user-directory}
+## LDAP External User Directory {#ldap-external-user-directory}

 In addition to the locally defined users, a remote LDAP server can be used as a source of user definitions. To achieve this, specify previously defined LDAP server name (see [LDAP Server Definition](#ldap-server-definition)) in the `ldap` section inside the `users_directories` section of the `config.xml` file.

-At each login attempt, ClickHouse tries to find the user definition locally and authenticate it as usual. If the user is not defined, ClickHouse will assume the definition exists in the external LDAP directory and will try to "bind" to the specified DN at the LDAP server using the provided credentials. If successful, the user will be considered existing and authenticated. The user will be assigned roles from the list specified in the `roles` section. Additionally, LDAP "search" can be performed and results can be transformed and treated as role names and then be assigned to the user if the `role_mapping` section is also configured. All this implies that the SQL-driven [Access Control and Account Management](../access-rights.md#access-control) is enabled and roles are created using the [CREATE ROLE](../../sql-reference/statements/create/role.md#create-role-statement) statement.
+At each login attempt, ClickHouse tries to find the user definition locally and authenticate it as usual. If the user is not defined, ClickHouse will assume the definition exists in the external LDAP directory and will try to "bind" to the specified DN at the LDAP server using the provided credentials. If successful, the user will be considered existing and authenticated. The user will be assigned roles from the list specified in the `roles` section. Additionally, LDAP "search" can be performed and results can be transformed and treated as role names and then be assigned to the user if the `role_mapping` section is also configured. All this implies that the SQL-driven [Access Control and Account Management](/docs/en/guides/sre/user-management/index.md#access-control) is enabled and roles are created using the [CREATE ROLE](/docs/en/sql-reference/statements/create/role.md#create-role-statement) statement.

 **Example**

@ -173,7 +173,7 @@ Note that `my_ldap_server` referred in the `ldap` section inside the `user_direc
 - `roles` — Section with a list of locally defined roles that will be assigned to each user retrieved from the LDAP server.
    - If no roles are specified here or assigned during role mapping (below), user will not be able to perform any actions after authentication.
 - `role_mapping` — Section with LDAP search parameters and mapping rules.
-    - When a user authenticates, while still bound to LDAP, an LDAP search is performed using `search_filter` and the name of the logged-in user. For each entry found during that search, the value of the specified attribute is extracted. For each attribute value that has the specified prefix, the prefix is removed, and the rest of the value becomes the name of a local role defined in ClickHouse, which is expected to be created beforehand by the [CREATE ROLE](../../sql-reference/statements/create/role.md#create-role-statement) statement.
+    - When a user authenticates, while still bound to LDAP, an LDAP search is performed using `search_filter` and the name of the logged-in user. For each entry found during that search, the value of the specified attribute is extracted. For each attribute value that has the specified prefix, the prefix is removed, and the rest of the value becomes the name of a local role defined in ClickHouse, which is expected to be created beforehand by the [CREATE ROLE](/docs/en/sql-reference/statements/create/role.md#create-role-statement) statement.
    - There can be multiple `role_mapping` sections defined inside the same `ldap` section. All of them will be applied.
        - `base_dn` — Template used to construct the base DN for the LDAP search.
            - The resulting DN will be constructed by replacing all `{user_name}`, `{bind_dn}`, and `{user_dn}` substrings of the template with the actual user name, bind DN, and user DN during each LDAP search.
--- a/docs/en/operations/monitoring.md
+++ b/docs/en/operations/monitoring.md
@ -2,6 +2,7 @@
 slug: /en/operations/monitoring
 sidebar_position: 45
 sidebar_label: Monitoring
+description: You can monitor the utilization of hardware resources and also ClickHouse server metrics.
 ---

 # Monitoring
--- a/docs/en/operations/optimizing-performance/sampling-query-profiler.md
+++ b/docs/en/operations/optimizing-performance/sampling-query-profiler.md
@ -7,11 +7,23 @@ import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.m

 # Sampling Query Profiler

-<SelfManaged />
-
 ClickHouse runs sampling profiler that allows analyzing query execution. Using profiler you can find source code routines that used the most frequently during query execution. You can trace CPU time and wall-clock time spent including idle time.

-To use profiler:
+Query profiler is automatically enabled in ClickHouse Cloud and you can run a sample query as follows
+
+``` sql
+SELECT
+    count(),
+    arrayStringConcat(arrayMap(x -> concat(demangle(addressToSymbol(x)), '\n    ', addressToLine(x)), trace), '\n') AS sym
+FROM system.trace_log
+WHERE (query_id = 'ebca3574-ad0a-400a-9cbc-dca382f5998c') AND (event_date = today())
+GROUP BY trace
+ORDER BY count() DESC
+LIMIT 10
+SETTINGS allow_introspection_functions = 1
+```
+
+In self-managed deployments, to use query profiler:

 -   Setup the [trace_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-trace_log) section of the server configuration.

--- a/docs/en/operations/query-cache.md
+++ b/docs/en/operations/query-cache.md
@ -85,8 +85,8 @@ make the matching more natural, all query-level settings related to the query ca

 If the query was aborted due to an exception or user cancellation, no entry is written into the query cache.

-The size of the query cache, the maximum number of cache entries and the maximum size of cache entries (in bytes and in records) can
-be configured using different [server configuration options](server-configuration-parameters/settings.md#server_configuration_parameters_query-cache).
+The size of the query cache in bytes, the maximum number of cache entries and the maximum size of individual cache entries (in bytes and in
+records) can be configured using different [server configuration options](server-configuration-parameters/settings.md#server_configuration_parameters_query-cache).

 To define how long a query must run at least such that its result can be cached, you can use setting
 [query_cache_min_query_duration](settings/settings.md#query-cache-min-query-duration). For example, the result of query
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@ -2,6 +2,7 @@
 slug: /en/operations/server-configuration-parameters/settings
 sidebar_position: 57
 sidebar_label: Server Settings
+description: This section contains descriptions of server settings that cannot be changed at the session or query level.
 ---

 # Server Settings
@ -275,7 +276,7 @@ Path:
 -   Specify the absolute path or the path relative to the server config file.
 -   The path can contain wildcards \* and ?.

-See also “[Dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md)”.
+See also “[Dictionaries](../../sql-reference/dictionaries/index.md)”.

 **Example**

@ -1025,7 +1026,7 @@ If the number of **idle** threads in the Backups IO Thread pool exceeds `max_bac
 Possible values:

 -   Positive integer.
-   Zero. 
+-   Zero.

 Default value: `0`.

@ -1360,7 +1361,7 @@ If the table does not exist, ClickHouse will create it. If the structure of the

 The following settings are available:

-   `size`: The maximum cache size in bytes. 0 means the query cache is disabled. Default value: `1073741824` (1 GiB).
+-   `max_size`: The maximum cache size in bytes. 0 means the query cache is disabled. Default value: `1073741824` (1 GiB).
 -   `max_entries`: The maximum number of `SELECT` query results stored in the cache. Default value: `1024`.
 -   `max_entry_size`: The maximum size in bytes `SELECT` query results may have to be saved in the cache. Default value: `1048576` (1 MiB).
 -   `max_entry_rows`: The maximum number of rows `SELECT` query results may have to be saved in the cache. Default value: `30000000` (30 mil).
@ -1368,7 +1369,7 @@ The following settings are available:
 Changed settings take effect immediately.

 :::warning
-Data for the query cache is allocated in DRAM. If memory is scarce, make sure to set a small value for `size` or disable the query cache altogether.
+Data for the query cache is allocated in DRAM. If memory is scarce, make sure to set a small value for `max_size` or disable the query cache altogether.
 :::

 **Example**
@ -1881,6 +1882,16 @@ The update is performed asynchronously, in a separate system thread.
 Manage executing [distributed ddl queries](../../sql-reference/distributed-ddl.md)  (CREATE, DROP, ALTER, RENAME) on cluster.
 Works only if [ZooKeeper](#server-settings_zookeeper) is enabled.

+The configurable settings within `<distributed_ddl>` include:
+
+- **path**: the path in Keeper for the `task_queue` for DDL queries
+- **profile**: the profile used to execute the DDL queries
+- **pool_size**: how many `ON CLUSTER` queries can be run simultaneously
+- **max_tasks_in_queue**: the maximum number of tasks that can be in the queue. Default is 1,000
+- **task_max_lifetime**: delete node if its age is greater than this value. Default is `7 * 24 * 60 * 60` (a week in seconds)
+- **cleanup_delay_period**:  cleaning starts after new node event is received if the last cleaning wasn't made sooner than `cleanup_delay_period` seconds ago. Default is 60 seconds
+
+
 **Example**

 ```xml
@ -1917,7 +1928,7 @@ Default value: `/var/lib/clickhouse/access/`.

 **See also**

- [Access Control and Account Management](../../operations/access-rights.md#access-control)
+- [Access Control and Account Management](../../guides/sre/user-management/index.md#access-control)

 ## user_directories {#user_directories}

--- a/docs/en/operations/settings/settings-formats.md
+++ b/docs/en/operations/settings/settings-formats.md
@ -964,7 +964,7 @@ Default value: 1.

 ### input_format_arrow_import_nested {#input_format_arrow_import_nested}

-Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns as an array of structs in [Arrow](../../interfaces/formats.md/#data_types-matching-arrow) input format.
+Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/index.md) columns as an array of structs in [Arrow](../../interfaces/formats.md/#data_types-matching-arrow) input format.

 Possible values:

@ -1024,7 +1024,7 @@ Default value: `none`.

 ### input_format_orc_import_nested {#input_format_orc_import_nested}

-Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns as an array of structs in [ORC](../../interfaces/formats.md/#data-format-orc) input format.
+Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/index.md) columns as an array of structs in [ORC](../../interfaces/formats.md/#data-format-orc) input format.

 Possible values:

@ -1073,7 +1073,7 @@ Default value: `none`.

 ### input_format_parquet_import_nested {#input_format_parquet_import_nested}

-Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns as an array of structs in [Parquet](../../interfaces/formats.md/#data-format-parquet) input format.
+Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/index.md) columns as an array of structs in [Parquet](../../interfaces/formats.md/#data-format-parquet) input format.

 Possible values:

@ -1538,6 +1538,6 @@ Default value: `1GiB`.

 ### input_format_native_allow_types_conversion {#input_format_native_allow_types_conversion}

-Allow types conversion in Native input format between columns from input data and requested columns. 
+Allow types conversion in Native input format between columns from input data and requested columns.

 Enabled by default.
--- a/docs/en/operations/settings/settings-profiles.md
+++ b/docs/en/operations/settings/settings-profiles.md
@ -9,7 +9,7 @@ sidebar_label: Settings Profiles
 A settings profile is a collection of settings grouped under the same name.

 :::note
-ClickHouse also supports [SQL-driven workflow](../../operations/access-rights.md#access-control) for managing settings profiles. We recommend using it.
+ClickHouse also supports [SQL-driven workflow](../../guides/sre/user-management/index.md#access-control) for managing settings profiles. We recommend using it.
 :::

 The profile can have any name. You can specify the same profile for different users. The most important thing you can write in the settings profile is `readonly=1`, which ensures read-only access.
--- a/docs/en/operations/settings/settings-users.md
+++ b/docs/en/operations/settings/settings-users.md
@ -9,7 +9,7 @@ sidebar_label: User Settings
 The `users` section of the `user.xml` configuration file contains user settings.

 :::note
-ClickHouse also supports [SQL-driven workflow](../../operations/access-rights.md#access-control) for managing users. We recommend using it.
+ClickHouse also supports [SQL-driven workflow](../../guides/sre/user-management/index.md#access-control) for managing users. We recommend using it.
 :::

 Structure of the `users` section:
@ -77,7 +77,7 @@ Password can be specified in plaintext or in SHA256 (hex format).

 ### access_management {#access_management-user-setting}

-This setting enables or disables using of SQL-driven [access control and account management](../../operations/access-rights.md#access-control) for the user.
+This setting enables or disables using of SQL-driven [access control and account management](../../guides/sre/user-management/index.md#access-control) for the user.

 Possible values:

--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@ -2999,7 +2999,7 @@ It can be useful when merges are CPU bounded not IO bounded (performing heavy da

 ## max_final_threads {#max-final-threads}

-Sets the maximum number of parallel threads for the `SELECT` query data read phase with the [FINAL](../../sql-reference/statements/select/from.md/#select-from-final) modifier.
+Sets the maximum number of parallel threads for the `SELECT` query data read phase with the [FINAL](../../sql-reference/statements/select/from.md#select-from-final) modifier.

 Possible values:

@ -3094,9 +3094,9 @@ Possible values:

 Default value: `0`.

-## s3_truncate_on_insert 
+## s3_truncate_on_insert

-Enables or disables truncate before inserts in s3 engine tables. If disabled, an exception will be thrown on insert attempts if an S3 object already exists. 
+Enables or disables truncate before inserts in s3 engine tables. If disabled, an exception will be thrown on insert attempts if an S3 object already exists.

 Possible values:
 - 0 — `INSERT` query appends new data to the end of the file.
@ -3104,9 +3104,9 @@ Possible values:

 Default value: `0`.

-## hdfs_truncate_on_insert 
+## hdfs_truncate_on_insert

-Enables or disables truncation before an insert in hdfs engine tables. If disabled, an exception will be thrown on an attempt to insert if a file in HDFS already exists. 
+Enables or disables truncation before an insert in hdfs engine tables. If disabled, an exception will be thrown on an attempt to insert if a file in HDFS already exists.

 Possible values:
 - 0 — `INSERT` query appends new data to the end of the file.
@ -3114,11 +3114,11 @@ Possible values:

 Default value: `0`.

-## engine_file_allow_create_multiple_files 
+## engine_file_allow_create_multiple_files

 Enables or disables creating a new file on each insert in file engine tables if the format has the suffix (`JSON`, `ORC`, `Parquet`, etc.). If enabled, on each insert a new file will be created with a name following this pattern:

-`data.Parquet` -> `data.1.Parquet` -> `data.2.Parquet`, etc. 
+`data.Parquet` -> `data.1.Parquet` -> `data.2.Parquet`, etc.

 Possible values:
 - 0 — `INSERT` query appends new data to the end of the file.
@ -3126,11 +3126,11 @@ Possible values:

 Default value: `0`.

-## s3_create_new_file_on_insert 
+## s3_create_new_file_on_insert

 Enables or disables creating a new file on each insert in s3 engine tables. If enabled, on each insert a new S3 object will be created with the key, similar to this pattern:

-initial: `data.Parquet.gz` -> `data.1.Parquet.gz` -> `data.2.Parquet.gz`, etc. 
+initial: `data.Parquet.gz` -> `data.1.Parquet.gz` -> `data.2.Parquet.gz`, etc.

 Possible values:
 - 0 — `INSERT` query appends new data to the end of the file.
@ -3142,7 +3142,7 @@ Default value: `0`.

 Enables or disables creating a new file on each insert in HDFS engine tables. If enabled, on each insert a new HDFS file will be created with the name, similar to this pattern:

-initial: `data.Parquet.gz` -> `data.1.Parquet.gz` -> `data.2.Parquet.gz`, etc. 
+initial: `data.Parquet.gz` -> `data.1.Parquet.gz` -> `data.2.Parquet.gz`, etc.

 Possible values:
 - 0 — `INSERT` query appends new data to the end of the file.
@ -3438,7 +3438,7 @@ Default value: `throw`.

 ## flatten_nested {#flatten-nested}

-Sets the data format of a [nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns.
+Sets the data format of a [nested](../../sql-reference/data-types/nested-data-structures/index.md) columns.

 Possible values:

@ -3753,7 +3753,7 @@ Default value: `1`.

 ## optimize_move_to_prewhere_if_final {#optimize_move_to_prewhere_if_final}

-Enables or disables automatic [PREWHERE](../../sql-reference/statements/select/prewhere.md) optimization in [SELECT](../../sql-reference/statements/select/index.md) queries with [FINAL](../../sql-reference/statements/select/from.md/#select-from-final) modifier.
+Enables or disables automatic [PREWHERE](../../sql-reference/statements/select/prewhere.md) optimization in [SELECT](../../sql-reference/statements/select/index.md) queries with [FINAL](../../sql-reference/statements/select/from.md#select-from-final) modifier.

 Works only for [*MergeTree](../../engines/table-engines/mergetree-family/index.md) tables.

@ -3770,7 +3770,7 @@ Default value: `0`.

 ## optimize_using_constraints

-Use [constraints](../../sql-reference/statements/create/table#constraints) for query optimization. The default is `false`.
+Use [constraints](../../sql-reference/statements/create/table.md#constraints) for query optimization. The default is `false`.

 Possible values:

@ -3778,7 +3778,7 @@ Possible values:

 ## optimize_append_index

-Use [constraints](../../sql-reference/statements/create/table#constraints) in order to append index condition. The default is `false`.
+Use [constraints](../../sql-reference/statements/create/table.md#constraints) in order to append index condition. The default is `false`.

 Possible values:

@ -3786,7 +3786,7 @@ Possible values:

 ## optimize_substitute_columns

-Use [constraints](../../sql-reference/statements/create/table#constraints) for column substitution. The default is `false`.
+Use [constraints](../../sql-reference/statements/create/table.md#constraints) for column substitution. The default is `false`.

 Possible values:

@ -3984,7 +3984,7 @@ Use this setting only for backward compatibility if your use cases depend on old

 ## final {#final}

-Automatically applies [FINAL](../../sql-reference/statements/select/from/#final-modifier) modifier to all tables in a query, to tables where [FINAL](../../sql-reference/statements/select/from/#final-modifier) is applicable, including joined tables and tables in sub-queries, and 
+Automatically applies [FINAL](../../sql-reference/statements/select/from.md#final-modifier) modifier to all tables in a query, to tables where [FINAL](../../sql-reference/statements/select/from.md#final-modifier) is applicable, including joined tables and tables in sub-queries, and
 distributed tables.

 Possible values:
@ -4030,7 +4030,7 @@ SELECT * FROM test;

 ## asterisk_include_materialized_columns {#asterisk_include_materialized_columns}

-Include [MATERIALIZED](../../sql-reference/statements/create/table/#materialized) columns for wildcard query (`SELECT *`).
+Include [MATERIALIZED](../../sql-reference/statements/create/table.md#materialized) columns for wildcard query (`SELECT *`).

 Possible values:

@ -4041,7 +4041,7 @@ Default value: `0`.

 ## asterisk_include_alias_columns {#asterisk_include_alias_columns}

-Include [ALIAS](../../sql-reference/statements/create/table/#alias) columns for wildcard query (`SELECT *`).
+Include [ALIAS](../../sql-reference/statements/create/table.md#alias) columns for wildcard query (`SELECT *`).

 Possible values:

@ -4049,3 +4049,32 @@ Possible values:
 - 1 - enabled

 Default value: `0`.
+
+## stop_reading_on_first_cancel {#stop_reading_on_first_cancel}
+When set to `true` and the user wants to interrupt a query (for example using `Ctrl+C` on the client), then the query continues execution only on data that was already read from the table. Afterward, it will return a partial result of the query for the part of the table that was read. To fully stop the execution of a query without a partial result, the user should send 2 cancel requests.
+
+**Example without setting on Ctrl+C**
+```sql
+SELECT sum(number) FROM numbers(10000000000)
+
+Cancelling query.
+Ok.
+Query was cancelled.
+
+0 rows in set. Elapsed: 1.334 sec. Processed 52.65 million rows, 421.23 MB (39.48 million rows/s., 315.85 MB/s.)
+```
+
+**Example with setting on Ctrl+C**
+```sql
+SELECT sum(number) FROM numbers(10000000000) SETTINGS stop_reading_on_first_cancel=true
+
+┌──────sum(number)─┐
+│ 1355411451286266 │
+└──────────────────┘
+
+1 row in set. Elapsed: 1.331 sec. Processed 52.13 million rows, 417.05 MB (39.17 million rows/s., 313.33 MB/s.)
+```
+
+Possible values: `true`, `false`
+
+Default value: `false`
--- a/docs/en/operations/system-tables/dictionaries.md
+++ b/docs/en/operations/system-tables/dictionaries.md
@ -3,12 +3,12 @@ slug: /en/operations/system-tables/dictionaries
 ---
 # dictionaries

-Contains information about [dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md).
+Contains information about [dictionaries](../../sql-reference/dictionaries/index.md).

 Columns:

 -   `database` ([String](../../sql-reference/data-types/string.md)) — Name of the database containing the dictionary created by DDL query. Empty string for other dictionaries.
-   `name` ([String](../../sql-reference/data-types/string.md)) — [Dictionary name](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md).
+-   `name` ([String](../../sql-reference/data-types/string.md)) — [Dictionary name](../../sql-reference/dictionaries/index.md).
 -   `uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — Dictionary UUID.
 -   `status` ([Enum8](../../sql-reference/data-types/enum.md)) — Dictionary status. Possible values:
    -   `NOT_LOADED` — Dictionary was not loaded because it was not used.
@ -18,20 +18,20 @@ Columns:
    -   `LOADED_AND_RELOADING` — Dictionary is loaded successfully, and is being reloaded right now (frequent reasons: [SYSTEM RELOAD DICTIONARY](../../sql-reference/statements/system.md#query_language-system-reload-dictionary) query, timeout, dictionary config has changed).
    -   `FAILED_AND_RELOADING` — Could not load the dictionary as a result of an error and is loading now.
 -   `origin` ([String](../../sql-reference/data-types/string.md)) — Path to the configuration file that describes the dictionary.
-   `type` ([String](../../sql-reference/data-types/string.md)) — Type of a dictionary allocation. [Storing Dictionaries in Memory](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md).
-   `key.names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Array of [key names](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-key) provided by the dictionary.
-   `key.types` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Corresponding array of [key types](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-key) provided by the dictionary.
-   `attribute.names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Array of [attribute names](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-attributes) provided by the dictionary.
-   `attribute.types` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Corresponding array of [attribute types](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-attributes) provided by the dictionary.
+-   `type` ([String](../../sql-reference/data-types/string.md)) — Type of a dictionary allocation. [Storing Dictionaries in Memory](../../sql-reference/dictionaries/index.md#storig-dictionaries-in-memory).
+-   `key.names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Array of [key names](../../sql-reference/dictionaries/index.md#dictionary-key-and-fields#ext_dict_structure-key) provided by the dictionary.
+-   `key.types` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Corresponding array of [key types](../../sql-reference/dictionaries/index.md#dictionary-key-and-fields#ext_dict_structure-key) provided by the dictionary.
+-   `attribute.names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Array of [attribute names](../../sql-reference/dictionaries/index.md#dictionary-key-and-fields#ext_dict_structure-attributes) provided by the dictionary.
+-   `attribute.types` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Corresponding array of [attribute types](../../sql-reference/dictionaries/index.md#dictionary-key-and-fields#ext_dict_structure-attributes) provided by the dictionary.
 -   `bytes_allocated` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Amount of RAM allocated for the dictionary.
 -   `query_count` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of queries since the dictionary was loaded or since the last successful reboot.
 -   `hit_rate` ([Float64](../../sql-reference/data-types/float.md)) — For cache dictionaries, the percentage of uses for which the value was in the cache.
 -   `found_rate` ([Float64](../../sql-reference/data-types/float.md)) — The percentage of uses for which the value was found.
 -   `element_count` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of items stored in the dictionary.
 -   `load_factor` ([Float64](../../sql-reference/data-types/float.md)) — Percentage filled in the dictionary (for a hashed dictionary, the percentage filled in the hash table).
-   `source` ([String](../../sql-reference/data-types/string.md)) — Text describing the [data source](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md) for the dictionary.
-   `lifetime_min` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Minimum [lifetime](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) of the dictionary in memory, after which ClickHouse tries to reload the dictionary (if `invalidate_query` is set, then only if it has changed). Set in seconds.
-   `lifetime_max` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Maximum [lifetime](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) of the dictionary in memory, after which ClickHouse tries to reload the dictionary (if `invalidate_query` is set, then only if it has changed). Set in seconds.
+-   `source` ([String](../../sql-reference/data-types/string.md)) — Text describing the [data source](../../sql-reference/dictionaries/index.md#dictionary-sources) for the dictionary.
+-   `lifetime_min` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Minimum [lifetime](../../sql-reference/dictionaries/index.md#dictionary-updates) of the dictionary in memory, after which ClickHouse tries to reload the dictionary (if `invalidate_query` is set, then only if it has changed). Set in seconds.
+-   `lifetime_max` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Maximum [lifetime](../../sql-reference/dictionaries/index.md#dictionary-updates) of the dictionary in memory, after which ClickHouse tries to reload the dictionary (if `invalidate_query` is set, then only if it has changed). Set in seconds.
 -   `loading_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Start time for loading the dictionary.
 -   `last_successful_update_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — End time for loading or updating the dictionary. Helps to monitor some troubles with dictionary sources and investigate the causes.
 -   `loading_duration` ([Float32](../../sql-reference/data-types/float.md)) — Duration of a dictionary loading.
--- a/docs/en/operations/system-tables/marked_dropped_tables.md
+++ b/docs/en/operations/system-tables/marked_dropped_tables.md
@ -1,7 +1,7 @@
 ---
-slug: /en/operations/system-tables/marked_dropped_tables
+slug: /en/operations/system-tables/dropped_tables
 ---
-# marked_dropped_tables
+# dropped_tables

 Contains information about tables that drop table has been executed but data cleanup has not been actually performed.

@ -17,11 +17,11 @@ Columns:

 **Example**

-The following example shows how to get information about marked_dropped_tables.
+The following example shows how to get information about dropped_tables.

 ``` sql
 SELECT *
-FROM system.marked_dropped_tables\G
+FROM system.dropped_tables\G
 ```

 ``` text
--- a/docs/en/operations/system-tables/quotas.md
+++ b/docs/en/operations/system-tables/quotas.md
@ -20,7 +20,7 @@ Columns:
 - `apply_to_all` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Logical value. It shows which users the quota is applied to. Values:
    - `0` — The quota applies to users specify in the `apply_to_list`.
    - `1` — The quota applies to all users except those listed in `apply_to_except`.
- `apply_to_list` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — List of user names/[roles](../../operations/access-rights.md#role-management) that the quota should be applied to.
+- `apply_to_list` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — List of user names/[roles](../../guides/sre/user-management/index.md#role-management) that the quota should be applied to.
 - `apply_to_except` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — List of user names/roles that the quota should not apply to.

 ## See Also {#see-also}
--- a/docs/en/operations/system-tables/roles.md
+++ b/docs/en/operations/system-tables/roles.md
@ -3,7 +3,7 @@ slug: /en/operations/system-tables/roles
 ---
 # roles

-Contains information about configured [roles](../../operations/access-rights.md#role-management).
+Contains information about configured [roles](../../guides/sre/user-management/index.md#role-management).

 Columns:

--- a/docs/en/operations/system-tables/users.md
+++ b/docs/en/operations/system-tables/users.md
@ -3,7 +3,7 @@ slug: /en/operations/system-tables/users
 ---
 # users

-Contains a list of [user accounts](../../operations/access-rights.md#user-account-management) configured at the server.
+Contains a list of [user accounts](../../guides/sre/user-management/index.md#user-account-management) configured at the server.

 Columns:
 -    `name` ([String](../../sql-reference/data-types/string.md)) — User name.
--- a/docs/en/operations/tips.md
+++ b/docs/en/operations/tips.md
@ -126,7 +126,7 @@ Otherwise you may get `Illegal instruction` crashes when hypervisor is run on ol

 ## ClickHouse Keeper and ZooKeeper {#zookeeper}

-ClickHouse Keeper is recommended to replace ZooKeeper for ClickHouse clusters.  See the documentation for [ClickHouse Keeper](clickhouse-keeper.md)
+ClickHouse Keeper is recommended to replace ZooKeeper for ClickHouse clusters.  See the documentation for [ClickHouse Keeper](../guides/sre/keeper/index.md)

 If you would like to continue using ZooKeeper then it is best to use a fresh version of ZooKeeper – 3.4.9 or later. The version in stable Linux distributions may be outdated.

@ -134,7 +134,7 @@ You should never use manually written scripts to transfer data between different

 If you want to divide an existing ZooKeeper cluster into two, the correct way is to increase the number of its replicas and then reconfigure it as two independent clusters.

-You can run ClickHouse Keeper on the same server as ClickHouse in test environments, or in environments with low ingestion rate. 
+You can run ClickHouse Keeper on the same server as ClickHouse in test environments, or in environments with low ingestion rate.
 For production environments we suggest to use separate servers for ClickHouse and ZooKeeper/Keeper, or place ClickHouse files and Keeper files on to separate disks. Because ZooKeeper/Keeper are very sensitive for disk latency and ClickHouse may utilize all available system resources.

 You can have ZooKeeper observers in an ensemble but ClickHouse servers should not interact with observers.
--- a/docs/en/operations/utilities/clickhouse-local.md
+++ b/docs/en/operations/utilities/clickhouse-local.md
@ -4,9 +4,9 @@ sidebar_position: 60
 sidebar_label: clickhouse-local
 ---

-# clickhouse-local 
+# clickhouse-local

-The `clickhouse-local` program enables you to perform fast processing on local files, without having to deploy and configure the ClickHouse server. It accepts data that represent tables and queries them using [ClickHouse SQL dialect](../../sql-reference/). `clickhouse-local` uses the same core as ClickHouse server, so it supports most of the features and the same set of formats and table engines. 
+The `clickhouse-local` program enables you to perform fast processing on local files, without having to deploy and configure the ClickHouse server. It accepts data that represent tables and queries them using [ClickHouse SQL dialect](../../sql-reference/index.md). `clickhouse-local` uses the same core as ClickHouse server, so it supports most of the features and the same set of formats and table engines.

 By default `clickhouse-local` has access to data on the same host, and it does not depend on the server's configuration. It also supports loading server configuration using `--config-file` argument. For temporary data, a unique temporary data directory is created by default.

--- a/docs/en/operations/utilities/index.md
+++ b/docs/en/operations/utilities/index.md
@ -1,11 +1,11 @@
 ---
 slug: /en/operations/utilities/
 sidebar_position: 56
-sidebar_label: Overview
+sidebar_label: List of tools and utilities
 pagination_next: 'en/operations/utilities/clickhouse-copier'
 ---

-# ClickHouse Utilities 
+# List of tools and utilities

 -   [clickhouse-local](../../operations/utilities/clickhouse-local.md) — Allows running SQL queries on data without starting the ClickHouse server, similar to how `awk` does this.
 -   [clickhouse-copier](../../operations/utilities/clickhouse-copier.md) — Copies (and reshards) data from one cluster to another cluster.
--- a/docs/en/sql-reference/_category_.yml
+++ b/docs/en/sql-reference/_category_.yml
@ -1,7 +1,7 @@
-position: 15
+position: 1
 label: 'SQL Reference'
 collapsible: true
 collapsed: true
 link:
-  type: doc
-  id: en/sql-reference/index
+  type: generated-index
+  slug: /en/sql-reference
--- a/docs/en/sql-reference/aggregate-functions/reference/contingency.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/contingency.md
@ -5,7 +5,7 @@ sidebar_position: 350

 # contingency

-The `contingency` function calculates the [contingency coefficient](https://en.wikipedia.org/wiki/Contingency_table#Cram%C3%A9r's_V_and_the_contingency_coefficient_C), a value that measures the association between two columns in a table. The computation is similar to [the `cramersV` function](./cramersv) but with a different denominator in the square root.
+The `contingency` function calculates the [contingency coefficient](https://en.wikipedia.org/wiki/Contingency_table#Cram%C3%A9r's_V_and_the_contingency_coefficient_C), a value that measures the association between two columns in a table. The computation is similar to [the `cramersV` function](./cramersv.md) but with a different denominator in the square root.


 **Syntax**
--- a/docs/en/sql-reference/aggregate-functions/reference/cramersvbiascorrected.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/cramersvbiascorrected.md
@ -6,7 +6,7 @@ sidebar_position: 352
 # cramersVBiasCorrected


-Cramér's V is a measure of association between two columns in a table. The result of the [`cramersV` function](./cramersv) ranges from 0 (corresponding to no association between the variables) to 1 and can reach 1 only when each value is completely determined by the other. The function can be heavily biased, so this version of Cramér's V uses the [bias correction](https://en.wikipedia.org/wiki/Cram%C3%A9r%27s_V#Bias_correction).
+Cramér's V is a measure of association between two columns in a table. The result of the [`cramersV` function](./cramersv.md) ranges from 0 (corresponding to no association between the variables) to 1 and can reach 1 only when each value is completely determined by the other. The function can be heavily biased, so this version of Cramér's V uses the [bias correction](https://en.wikipedia.org/wiki/Cram%C3%A9r%27s_V#Bias_correction).



--- a/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md
@ -19,7 +19,7 @@ Each `value` corresponds to the determinate `timeunit`. The half-life `x` is the
 **Arguments**

 -   `value` — Value. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
-   `timeunit` — Timeunit. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). Timeunit is not timestamp (seconds), it's -- an index of the time interval. Can be calculated using [intDiv](../../functions/arithmetic-functions/#intdiva-b).
+-   `timeunit` — Timeunit. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). Timeunit is not timestamp (seconds), it's -- an index of the time interval. Can be calculated using [intDiv](../../functions/arithmetic-functions.md#intdiva-b).

 **Parameters**

--- a/docs/en/sql-reference/data-types/index.md
+++ b/docs/en/sql-reference/data-types/index.md
@ -1,13 +1,33 @@
 ---
 slug: /en/sql-reference/data-types/
-sidebar_label: Data Types
+sidebar_label: List of data types
 sidebar_position: 37
 ---

-# Data Types 
+# ClickHouse Data Types

-ClickHouse can store various kinds of data in table cells.
+ClickHouse can store various kinds of data in table cells. This section describes the supported data types and special considerations for using and/or implementing them if any.

-This section describes the supported data types and special considerations for using and/or implementing them if any.
+:::note
+You can check whether a data type name is case-sensitive in the [system.data_type_families](../../operations/system-tables/data_type_families.md#system_tables-data_type_families) table.
+:::

-You can check whether data type name is case-sensitive in the [system.data_type_families](../../operations/system-tables/data_type_families.md#system_tables-data_type_families) table.
+ClickHouse data types include:
+
+- **Integer types**: [signed and unsigned integers](./int-uint.md) (`UInt8`, `UInt16`, `UInt32`, `UInt64`, `UInt128`, `UInt256`, `Int8`, `Int16`, `Int32`, `Int64`, `Int128`, `Int256`)
+- **Floating-point numbers**: [floats](./float.md)(`Float32` and `Float64`) and [`Decimal` values](./decimal.md)
+- **Boolean**: ClickHouse has a [`Boolean` type](./boolean.md)
+- **Strings**: [`String`](./string.md) and [`FixedString`](./fixedstring.md)
+- **Dates**: use [`Date`](./date.md) and [`Date32`](./date32.md) for days, and [`DateTime`](./datetime.md) and [`DateTime64`](./datetime64.md) for instances in time
+- **JSON**: the [`JSON` object](./json.md) stores a JSON document in a single column
+- **UUID**: a performant option for storing [`UUID` values](./uuid.md)
+- **Low cardinality types**: use an [`Enum`](./enum.md) when you have a handful of unique values, or use [`LowCardinality`](./lowcardinality.md) when you have up to 10,000 unique values of a column
+- **Arrays**: any column can be defined as an [`Array` of values](./array.md)
+- **Maps**: use [`Map`](./map.md) for storing key/value pairs
+- **Aggregation function types**: use [`SimpleAggregateFunction`](./simpleaggregatefunction.md) and [`AggregateFunction`](./aggregatefunction.md) for storing the intermediate status of aggregate function results
+- **Nested data structures**: A [`Nested` data structure](./nested-data-structures/index.md) is like a table inside a cell
+- **Tuples**: A [`Tuple` of elements](./tuple.md), each having an individual type.
+- **Nullable**: [`Nullbale`](./nullable.md) allows you to store a value as `NULL` when a value is "missing" (instead of the column gettings its default value for the data type)
+- **IP addresses**: use [`IPv4`](./domains/ipv4.md) and [`IPv6`](./domains/ipv6.md) to efficiently store IP addresses
+- **Geo types**: for[ geographical data](./geo.md), including `Point`, `Ring`, `Polygon` and `MultiPolygon`
+- **Special data types**: including [`Expression`](./special-data-types/expression.md), [`Set`](./special-data-types/set.md), [`Nothing`](./special-data-types/nothing.md) and [`Interval`](./special-data-types/interval.md)
--- a/Show More
+++ b/Show More