Merge remote-tracking branch 'upstream/master' into HEAD

2024-11-10 01:25:21 +00:00 · 2022-12-20 13:37:23 +00:00 · 2022-12-20 13:37:23 +00:00 · 56187c5f0e
commit 56187c5f0e
parent ef9870e4cb 7627786af7
155 changed files with 5005 additions and 3294 deletions
--- a/.github/ISSUE_TEMPLATE/10_question.md
+++ b/.github/ISSUE_TEMPLATE/10_question.md
@ -7,6 +7,6 @@ assignees: ''

 ---

-> Make sure to check documentation https://clickhouse.com/docs/en/ first. If the question is concise and probably has a short answer, asking it in Telegram chat https://telegram.me/clickhouse_en is probably the fastest way to find the answer. For more complicated questions, consider asking them on StackOverflow with "clickhouse" tag https://stackoverflow.com/questions/tagged/clickhouse 
+> Make sure to check documentation https://clickhouse.com/docs/en/ first. If the question is concise and probably has a short answer, asking it in [community Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-1gh9ds7f4-PgDhJAaF8ad5RbWBAAjzFg) is probably the fastest way to find the answer. For more complicated questions, consider asking them on StackOverflow with "clickhouse" tag https://stackoverflow.com/questions/tagged/clickhouse 

 > If you still prefer GitHub issues, remove all this text and ask your question here.
--- a/.github/workflows/docs_check.yml
+++ b/.github/workflows/docs_check.yml
@ -16,6 +16,7 @@ on:  # yamllint disable-line rule:truthy
      - 'docker/docs/**'
      - 'docs/**'
      - 'website/**'
+      - 'utils/check-style/aspell-ignore/**'
 jobs:
  CheckLabels:
    runs-on: [self-hosted, style-checker]
--- a/.github/workflows/docs_release.yml
+++ b/.github/workflows/docs_release.yml
@ -17,6 +17,7 @@ concurrency:
      - 'docs/**'
      - 'utils/list-versions/version_date.tsv'
      - 'website/**'
+      - 'utils/check-style/aspell-ignore/**'
  workflow_dispatch:
 jobs:
  DockerHubPushAarch64:
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@ -842,7 +842,7 @@ jobs:
          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
          sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
-  BuilderBinAmd64SSE2:
+  BuilderBinAmd64Compat:
    needs: [DockerHubPush]
    runs-on: [self-hosted, builder]
    steps:
@ -853,7 +853,7 @@ jobs:
          IMAGES_PATH=${{runner.temp}}/images_path
          REPO_COPY=${{runner.temp}}/build_check/ClickHouse
          CACHES_PATH=${{runner.temp}}/../ccaches
-          BUILD_NAME=binary_amd64sse2
+          BUILD_NAME=binary_amd64_compat
          EOF
      - name: Download changed images
        uses: actions/download-artifact@v2
@ -1017,7 +1017,7 @@ jobs:
      - BuilderBinFreeBSD
      # - BuilderBinGCC
      - BuilderBinPPC64
-      - BuilderBinAmd64SSE2
+      - BuilderBinAmd64Compat
      - BuilderBinAarch64V80Compat
      - BuilderBinClangTidy
      - BuilderDebShared
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@ -16,6 +16,7 @@ on:  # yamllint disable-line rule:truthy
      - 'docker/docs/**'
      - 'docs/**'
      - 'website/**'
+      - 'utils/check-style/aspell-ignore/**'
 ##########################################################################################
 ##################################### SMALL CHECKS #######################################
 ##########################################################################################
@ -900,7 +901,7 @@ jobs:
          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
          sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
-  BuilderBinAmd64SSE2:
+  BuilderBinAmd64Compat:
    needs: [DockerHubPush, FastTest, StyleCheck]
    runs-on: [self-hosted, builder]
    steps:
@ -911,7 +912,7 @@ jobs:
          IMAGES_PATH=${{runner.temp}}/images_path
          REPO_COPY=${{runner.temp}}/build_check/ClickHouse
          CACHES_PATH=${{runner.temp}}/../ccaches
-          BUILD_NAME=binary_amd64sse2
+          BUILD_NAME=binary_amd64_compat
          EOF
      - name: Download changed images
        uses: actions/download-artifact@v2
@ -1070,7 +1071,7 @@ jobs:
      - BuilderBinFreeBSD
      # - BuilderBinGCC
      - BuilderBinPPC64
-      - BuilderBinAmd64SSE2
+      - BuilderBinAmd64Compat
      - BuilderBinAarch64V80Compat
      - BuilderBinClangTidy
      - BuilderDebShared
--- a/.gitignore
+++ b/.gitignore
@ -159,3 +159,8 @@ website/package-lock.json
 tests/queries/0_stateless/test_*
 tests/queries/0_stateless/*.binary
 tests/queries/0_stateless/*.generated-expect
+
+# rust
+/rust/**/target
+# It is autogenerated from *.in
+/rust/**/.cargo/config.toml
--- a/.gitmodules
+++ b/.gitmodules
@ -269,9 +269,6 @@
 [submodule "contrib/vectorscan"]
 	path = contrib/vectorscan
 	url = https://github.com/VectorCamp/vectorscan.git
-[submodule "contrib/liburing"]
-	path = contrib/liburing
-	url = https://github.com/axboe/liburing.git
 [submodule "contrib/c-ares"]
 	path = contrib/c-ares
 	url = https://github.com/ClickHouse/c-ares
@ -294,3 +291,6 @@
 [submodule "contrib/google-benchmark"]
 	path = contrib/google-benchmark
 	url = https://github.com/google/benchmark.git
+[submodule "contrib/libdivide"]
+	path = contrib/libdivide
+	url = https://github.com/ridiculousfish/libdivide.git
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -609,6 +609,8 @@ if (NATIVE_BUILD_TARGETS
            "-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}"
            "-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}"
            "-DENABLE_CCACHE=${ENABLE_CCACHE}"
+            # Avoid overriding .cargo/config.toml with native toolchain.
+            "-DENABLE_RUST=OFF"
            "-DENABLE_CLICKHOUSE_SELF_EXTRACTING=${ENABLE_CLICKHOUSE_SELF_EXTRACTING}"
        ${CMAKE_SOURCE_DIR}
        WORKING_DIRECTORY "${NATIVE_BUILD_DIR}"
--- a/base/base/CMakeLists.txt
+++ b/base/base/CMakeLists.txt
@ -40,6 +40,11 @@ else ()
    target_compile_definitions(common PUBLIC WITH_COVERAGE=0)
 endif ()

+# FIXME: move libraries for line reading out from base
+if (TARGET ch_rust::skim)
+    target_link_libraries(common PUBLIC ch_rust::skim)
+endif()
+
 target_include_directories(common PUBLIC .. "${CMAKE_CURRENT_BINARY_DIR}/..")

 if (OS_DARWIN AND NOT USE_STATIC_LIBRARIES)
--- a/base/base/ReplxxLineReader.cpp
+++ b/base/base/ReplxxLineReader.cpp
@ -16,9 +16,11 @@
 #include <fstream>
 #include <filesystem>
 #include <fmt/format.h>
-#include <boost/algorithm/string/split.hpp>
-#include <boost/algorithm/string/replace.hpp>
-#include <boost/algorithm/string/classification.hpp> /// is_any_of
+#include "config.h" // USE_SKIM
+
+#if USE_SKIM
+#include <skim.h>
+#endif

 namespace
 {
@ -39,36 +41,6 @@ std::string getEditor()
    return editor;
 }

-std::pair<std::string, FuzzyFinderType> getFuzzyFinder()
-{
-    const char * env_path = std::getenv("PATH"); // NOLINT(concurrency-mt-unsafe)
-
-    if (!env_path || !*env_path)
-        return {};
-
-    std::vector<std::string> paths;
-    boost::split(paths, env_path, boost::is_any_of(":"));
-    for (const auto & path_str : paths)
-    {
-        std::filesystem::path path(path_str);
-        std::filesystem::path sk_bin_path = path / "sk";
-        if (!access(sk_bin_path.c_str(), X_OK))
-            return {sk_bin_path, FUZZY_FINDER_SKIM};
-
-        std::filesystem::path fzf_bin_path = path / "fzf";
-        if (!access(fzf_bin_path.c_str(), X_OK))
-            return {fzf_bin_path, FUZZY_FINDER_FZF};
-    }
-
-    return {"", FUZZY_FINDER_NONE};
-}
-
-String escapeShellArgument(std::string arg)
-{
-    boost::replace_all(arg, "'", "'\\''");
-    return fmt::format("'{}'", arg);
-}
-
 /// See comments in ShellCommand::executeImpl()
 /// (for the vfork via dlsym())
 int executeCommand(char * const argv[])
@ -316,8 +288,6 @@ ReplxxLineReader::ReplxxLineReader(
    using namespace std::placeholders;
    using Replxx = replxx::Replxx;

-    std::tie(fuzzy_finder, fuzzy_finder_type) = getFuzzyFinder();
-
    if (!history_file_path.empty())
    {
        history_file_fd = open(history_file_path.c_str(), O_RDWR);
@ -422,17 +392,48 @@ ReplxxLineReader::ReplxxLineReader(
    };
    rx.bind_key(Replxx::KEY::meta('#'), insert_comment_action);

-    /// interactive search in history (requires fzf/sk)
-    if (fuzzy_finder_type != FUZZY_FINDER_NONE)
+#if USE_SKIM
+    auto interactive_history_search = [this](char32_t code)
    {
-        auto interactive_history_search = [this](char32_t code)
+        std::vector<std::string> words;
        {
-            openInteractiveHistorySearch();
-            rx.invoke(Replxx::ACTION::CLEAR_SELF, code);
-            return rx.invoke(Replxx::ACTION::REPAINT, code);
-        };
-        rx.bind_key(Replxx::KEY::control('R'), interactive_history_search);
-    }
+            auto hs(rx.history_scan());
+            while (hs.next())
+                words.push_back(hs.get().text());
+        }
+
+        std::string new_query;
+        try
+        {
+            new_query = std::string(skim(words));
+        }
+        catch (const std::exception & e)
+        {
+            rx.print("skim failed: %s (consider using Ctrl-T for a regular non-fuzzy reverse search)\n", e.what());
+        }
+        if (!new_query.empty())
+            rx.set_state(replxx::Replxx::State(new_query.c_str(), static_cast<int>(new_query.size())));
+
+        if (bracketed_paste_enabled)
+            enableBracketedPaste();
+
+        rx.invoke(Replxx::ACTION::CLEAR_SELF, code);
+        return rx.invoke(Replxx::ACTION::REPAINT, code);
+    };
+
+    rx.bind_key(Replxx::KEY::control('R'), interactive_history_search);
+
+    /// Rebind regular incremental search to C-T.
+    ///
+    /// NOTE: C-T by default this is a binding to swap adjustent chars
+    /// (TRANSPOSE_CHARACTERS), but for SQL it sounds pretty useless.
+    rx.bind_key(Replxx::KEY::control('T'), [this](char32_t)
+    {
+        /// Reverse search is detected by C-R.
+        uint32_t reverse_search = Replxx::KEY::control('R');
+        return rx.invoke(Replxx::ACTION::HISTORY_INCREMENTAL_SEARCH, reverse_search);
+    });
+#endif
 }

 ReplxxLineReader::~ReplxxLineReader()
@ -501,65 +502,6 @@ void ReplxxLineReader::openEditor()
        enableBracketedPaste();
 }

-void ReplxxLineReader::openInteractiveHistorySearch()
-{
-    assert(!fuzzy_finder.empty());
-    TemporaryFile history_file("clickhouse_client_history_in_XXXXXX.bin");
-    auto hs(rx.history_scan());
-    while (hs.next())
-    {
-        history_file.write(hs.get().text());
-        history_file.write(std::string(1, '\0'));
-    }
-    history_file.close();
-
-    TemporaryFile output_file("clickhouse_client_history_out_XXXXXX.sql");
-    output_file.close();
-
-    char sh[] = "sh";
-    char sh_c[] = "-c";
-    /// NOTE: You can use one of the following to configure the behaviour additionally:
-    /// - SKIM_DEFAULT_OPTIONS
-    /// - FZF_DEFAULT_OPTS
-    ///
-    /// And also note, that fzf and skim is 95% compatible (at least option
-    /// that is used here)
-    std::string fuzzy_finder_command = fmt::format("{} --read0 --height=30%", fuzzy_finder);
-    switch (fuzzy_finder_type)
-    {
-        case FUZZY_FINDER_SKIM:
-            fuzzy_finder_command += " --tac --tiebreak=-score";
-            break;
-        case FUZZY_FINDER_FZF:
-            fuzzy_finder_command += " --tac --tiebreak=index";
-            break;
-        case FUZZY_FINDER_NONE:
-            /// assertion for !fuzzy_finder.empty() is enough
-            break;
-    }
-    fuzzy_finder_command += fmt::format(" < {} > {}",
-        escapeShellArgument(history_file.getPath()),
-        escapeShellArgument(output_file.getPath()));
-    char * const argv[] = {sh, sh_c, fuzzy_finder_command.data(), nullptr};
-
-    try
-    {
-        if (executeCommand(argv) == 0)
-        {
-            std::string new_query = readFile(output_file.getPath());
-            rightTrim(new_query);
-            rx.set_state(replxx::Replxx::State(new_query.c_str(), static_cast<int>(new_query.size())));
-        }
-    }
-    catch (const std::runtime_error & e)
-    {
-        rx.print(e.what());
-    }
-
-    if (bracketed_paste_enabled)
-        enableBracketedPaste();
-}
-
 void ReplxxLineReader::enableBracketedPaste()
 {
    bracketed_paste_enabled = true;
--- a/base/base/ReplxxLineReader.h
+++ b/base/base/ReplxxLineReader.h
@ -4,15 +4,6 @@

 #include <replxx.hxx>

-enum FuzzyFinderType
-{
-    FUZZY_FINDER_NONE,
-    /// Use https://github.com/junegunn/fzf
-    FUZZY_FINDER_FZF,
-    /// Use https://github.com/lotabout/skim
-    FUZZY_FINDER_SKIM,
-};
-
 class ReplxxLineReader : public LineReader
 {
 public:
@ -35,7 +26,6 @@ private:
    void addToHistory(const String & line) override;
    int executeEditor(const std::string & path);
    void openEditor();
-    void openInteractiveHistorySearch();

    replxx::Replxx rx;
    replxx::Replxx::highlighter_callback_t highlighter;
@ -45,6 +35,4 @@ private:
    bool bracketed_paste_enabled = false;

    std::string editor;
-    std::string fuzzy_finder;
-    FuzzyFinderType fuzzy_finder_type = FUZZY_FINDER_NONE;
 };
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@ -65,7 +65,7 @@ add_contrib (dragonbox-cmake dragonbox)
 add_contrib (vectorscan-cmake vectorscan)
 add_contrib (jemalloc-cmake jemalloc)
 add_contrib (libcpuid-cmake libcpuid)
-add_contrib (libdivide)
+add_contrib (libdivide-cmake)
 add_contrib (libmetrohash)
 add_contrib (lz4-cmake lz4)
 add_contrib (murmurhash)
--- a/contrib/corrosion-cmake/CMakeLists.txt
+++ b/contrib/corrosion-cmake/CMakeLists.txt
@ -10,9 +10,6 @@ else()
 endif()

 option(ENABLE_RUST "Enable rust" ${DEFAULT_ENABLE_RUST})
-
-message(STATUS ${ENABLE_RUST})
-
 if(NOT ENABLE_RUST)
  message(STATUS "Not using rust")
  return()
@ -42,5 +39,7 @@ endif()

 message(STATUS "Switched Rust target to ${Rust_CARGO_TARGET}")

+# FindRust.cmake
+list(APPEND CMAKE_MODULE_PATH "${ClickHouse_SOURCE_DIR}/contrib/corrosion/cmake")
 # Define function corrosion_import_crate()
 include ("${ClickHouse_SOURCE_DIR}/contrib/corrosion/cmake/Corrosion.cmake")
--- a/contrib/libdivide
+++ b/contrib/libdivide
@ -0,0 +1 @@
+Subproject commit 3bd34388573681ce563348cdf04fe15d24770d04
--- a/contrib/libdivide-cmake/CMakeLists.txt
+++ b/contrib/libdivide-cmake/CMakeLists.txt
@ -0,0 +1,7 @@
+set(LIBDIVIDE_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libdivide")
+add_library (_libdivide INTERFACE)
+# for libdivide.h
+target_include_directories (_libdivide SYSTEM BEFORE INTERFACE ${LIBDIVIDE_SOURCE_DIR})
+# for libdivide-config.h
+target_include_directories (_libdivide SYSTEM BEFORE INTERFACE .)
+add_library (ch_contrib::libdivide ALIAS _libdivide)
--- a/contrib/libdivide-cmake/libdivide-config.h
+++ b/contrib/libdivide-cmake/libdivide-config.h
@ -0,0 +1,9 @@
+#if defined(__SSE2__)
+#    define LIBDIVIDE_SSE2
+#elif defined(__AVX512F__) || defined(__AVX512BW__) || defined(__AVX512VL__)
+#    define LIBDIVIDE_AVX512
+#elif defined(__AVX2__)
+#    define LIBDIVIDE_AVX2
+#elif defined(__aarch64__) && defined(__ARM_NEON)
+#    define LIBDIVIDE_NEON
+#endif
--- a/contrib/libdivide/CMakeLists.txt
+++ b/contrib/libdivide/CMakeLists.txt
@ -1,3 +0,0 @@
-add_library (_libdivide INTERFACE)
-target_include_directories (_libdivide SYSTEM BEFORE INTERFACE .)
-add_library (ch_contrib::libdivide ALIAS _libdivide)
--- a/contrib/libdivide/LICENSE.txt
+++ b/contrib/libdivide/LICENSE.txt
@ -1,20 +0,0 @@
-  libdivide
-  Copyright (C) 2010 ridiculous_fish
-
-  This software is provided 'as-is', without any express or implied
-  warranty.  In no event will the authors be held liable for any damages
-  arising from the use of this software.
-
-  Permission is granted to anyone to use this software for any purpose,
-  including commercial applications, and to alter it and redistribute it
-  freely, subject to the following restrictions:
-
-  1. The origin of this software must not be misrepresented; you must not
-     claim that you wrote the original software. If you use this software
-     in a product, an acknowledgment in the product documentation would be
-     appreciated but is not required.
-  2. Altered source versions must be plainly marked as such, and must not be
-     misrepresented as being the original software.
-  3. This notice may not be removed or altered from any source distribution.
-
-  libdivide@ridiculousfish.com
--- a/contrib/libdivide/README.txt
+++ b/contrib/libdivide/README.txt
@ -1,2 +0,0 @@
-https://github.com/ridiculousfish/libdivide
-http://libdivide.com/
--- a/contrib/libdivide/libdivide.h
+++ b/contrib/libdivide/libdivide.h
--- a/docker/packager/binary/build.sh
+++ b/docker/packager/binary/build.sh
@ -55,7 +55,8 @@ ccache --zero-stats ||:
 if [ "$BUILD_MUSL_KEEPER" == "1" ]
 then
    # build keeper with musl separately
-    cmake --debug-trycompile -DBUILD_STANDALONE_KEEPER=1 -DENABLE_CLICKHOUSE_KEEPER=1 -DCMAKE_VERBOSE_MAKEFILE=1 -DUSE_MUSL=1 -LA -DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-x86_64-musl.cmake "-DCMAKE_BUILD_TYPE=$BUILD_TYPE" "-DSANITIZE=$SANITIZER" -DENABLE_CHECK_HEAVY_BUILDS=1 "${CMAKE_FLAGS[@]}" ..
+    # and without rust bindings
+    cmake --debug-trycompile -DENABLE_RUST=OFF -DBUILD_STANDALONE_KEEPER=1 -DENABLE_CLICKHOUSE_KEEPER=1 -DCMAKE_VERBOSE_MAKEFILE=1 -DUSE_MUSL=1 -LA -DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-x86_64-musl.cmake "-DCMAKE_BUILD_TYPE=$BUILD_TYPE" "-DSANITIZE=$SANITIZER" -DENABLE_CHECK_HEAVY_BUILDS=1 "${CMAKE_FLAGS[@]}" ..
    # shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty.
    ninja $NINJA_FLAGS clickhouse-keeper

--- a/docker/packager/packager
+++ b/docker/packager/packager
@ -131,7 +131,7 @@ def parse_env_variables(
    ARM_V80COMPAT_SUFFIX = "-aarch64-v80compat"
    FREEBSD_SUFFIX = "-freebsd"
    PPC_SUFFIX = "-ppc64le"
-    AMD64_SSE2_SUFFIX = "-amd64sse2"
+    AMD64_COMPAT_SUFFIX = "-amd64-compat"

    result = []
    result.append("OUTPUT_DIR=/output")
@ -144,7 +144,7 @@ def parse_env_variables(
    is_cross_arm_v80compat = compiler.endswith(ARM_V80COMPAT_SUFFIX)
    is_cross_ppc = compiler.endswith(PPC_SUFFIX)
    is_cross_freebsd = compiler.endswith(FREEBSD_SUFFIX)
-    is_amd64_sse2 = compiler.endswith(AMD64_SSE2_SUFFIX)
+    is_amd64_compat = compiler.endswith(AMD64_COMPAT_SUFFIX)

    if is_cross_darwin:
        cc = compiler[: -len(DARWIN_SUFFIX)]
@ -197,8 +197,8 @@ def parse_env_variables(
        cmake_flags.append(
            "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-ppc64le.cmake"
        )
-    elif is_amd64_sse2:
-        cc = compiler[: -len(AMD64_SSE2_SUFFIX)]
+    elif is_amd64_compat:
+        cc = compiler[: -len(AMD64_COMPAT_SUFFIX)]
        result.append("DEB_ARCH=amd64")
        cmake_flags.append("-DNO_SSE3_OR_HIGHER=1")
    else:
@ -358,7 +358,7 @@ if __name__ == "__main__":
            "clang-15-aarch64",
            "clang-15-aarch64-v80compat",
            "clang-15-ppc64le",
-            "clang-15-amd64sse2",
+            "clang-15-amd64-compat",
            "clang-15-freebsd",
            "gcc-11",
        ),
--- a/docker/test/fasttest/run.sh
+++ b/docker/test/fasttest/run.sh
@ -116,6 +116,7 @@ function clone_submodules
            contrib/base64
            contrib/cctz
            contrib/libcpuid
+            contrib/libdivide
            contrib/double-conversion
            contrib/llvm-project
            contrib/lz4
--- a/docker/test/integration/runner/dockerd-entrypoint.sh
+++ b/docker/test/integration/runner/dockerd-entrypoint.sh
@ -12,6 +12,10 @@ echo '{
    "registry-mirrors" : ["http://dockerhub-proxy.dockerhub-proxy-zone:5000"]
 }' | dd of=/etc/docker/daemon.json 2>/dev/null

+# In case of test hung it is convenient to use pytest --pdb to debug it,
+# and on hung you can simply press Ctrl-C and it will spawn a python pdb,
+# but on SIGINT dockerd will exit, so ignore it to preserve the daemon.
+trap '' INT
 dockerd --host=unix:///var/run/docker.sock --host=tcp://0.0.0.0:2375 --default-address-pool base=172.17.0.0/12,size=24 &>/ClickHouse/tests/integration/dockerd.log &

 set +e
--- a/docker/test/stateful/Dockerfile
+++ b/docker/test/stateful/Dockerfile
@ -17,6 +17,7 @@ ENV S3_URL="https://clickhouse-datasets.s3.amazonaws.com"
 ENV DATASETS="hits visits"

 RUN npm install -g azurite
+RUN npm install tslib

 COPY run.sh /
 CMD ["/bin/bash", "/run.sh"]
--- a/docker/test/stateless/Dockerfile
+++ b/docker/test/stateless/Dockerfile
@ -80,6 +80,7 @@ ENV MINIO_ROOT_PASSWORD="clickhouse"
 ENV EXPORT_S3_STORAGE_POLICIES=1

 RUN npm install -g azurite
+RUN npm install tslib

 COPY run.sh /
 COPY setup_minio.sh /
--- a/docker/test/style/process_style_check_result.py
+++ b/docker/test/style/process_style_check_result.py
@ -19,6 +19,7 @@ def process_result(result_folder):
        "typos",
        "whitespaces",
        "workflows",
+        "submodules",
        "docs spelling",
    )

--- a/docker/test/style/run.sh
+++ b/docker/test/style/run.sh
@ -10,7 +10,7 @@ echo "Check style" | ts
 echo "Check python formatting with black" | ts
 ./check-black -n              |& tee /test_output/black_output.txt
 echo "Check python type hinting with mypy" | ts
-./check-mypy -n              |& tee /test_output/mypy_output.txt
+./check-mypy -n               |& tee /test_output/mypy_output.txt
 echo "Check typos" | ts
 ./check-typos                 |& tee /test_output/typos_output.txt
 echo "Check docs spelling" | ts
@ -19,6 +19,8 @@ echo "Check whitespaces" | ts
 ./check-whitespaces -n        |& tee /test_output/whitespaces_output.txt
 echo "Check workflows" | ts
 ./check-workflows             |& tee /test_output/workflows_output.txt
+echo "Check submodules" | ts
+./check-submodules            |& tee /test_output/submodules_output.txt
 echo "Check shell scripts with shellcheck" | ts
 ./shellcheck-run.sh           |& tee /test_output/shellcheck_output.txt
 /process_style_check_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv
--- a/docker/test/testflows/runner/dockerd-entrypoint.sh
+++ b/docker/test/testflows/runner/dockerd-entrypoint.sh
@ -10,6 +10,10 @@ cat > /etc/docker/daemon.json << EOF
 }
 EOF

+# In case of test hung it is convenient to use pytest --pdb to debug it,
+# and on hung you can simply press Ctrl-C and it will spawn a python pdb,
+# but on SIGINT dockerd will exit, so ignore it to preserve the daemon.
+trap '' INT
 dockerd --host=unix:///var/run/docker.sock --host=tcp://0.0.0.0:2375 &>/var/log/somefile &

 set +e
--- a/docs/_includes/install/universal.sh
+++ b/docs/_includes/install/universal.sh
@ -9,14 +9,22 @@ if [ "${OS}" = "Linux" ]
 then
    if [ "${ARCH}" = "x86_64" -o "${ARCH}" = "amd64" ]
    then
-        DIR="amd64"
+        # Require at least x86-64 + SSE4.2 (introduced in 2006). On older hardware fall back to plain x86-64 (introduced in 1999) which
+        # guarantees at least SSE2. The caveat is that plain x86-64 builds are much less tested than SSE 4.2 builds.
+        HAS_SSE42=$(grep sse4_2 /proc/cpuinfo)
+        if [ "${HAS_SSE42}" ]
+        then
+            DIR="amd64"
+        else
+            DIR="amd64compat"
+        fi
    elif [ "${ARCH}" = "aarch64" -o "${ARCH}" = "arm64" ]
    then
        # If the system has >=ARMv8.2 (https://en.wikipedia.org/wiki/AArch64), choose the corresponding build, else fall back to a v8.0
        # compat build. Unfortunately, the ARM ISA level cannot be read directly, we need to guess from the "features" in /proc/cpuinfo.
        # Also, the flags in /proc/cpuinfo are named differently than the flags passed to the compiler (cmake/cpu_features.cmake).
-        ARMV82=$(grep -m 1 'Features' /proc/cpuinfo | awk '/asimd/ && /sha1/ && /aes/ && /atomics/ && /lrcpc/')
-        if [ "${ARMV82}" ]
+        HAS_ARMV82=$(grep -m 1 'Features' /proc/cpuinfo | awk '/asimd/ && /sha1/ && /aes/ && /atomics/ && /lrcpc/')
+        if [ "${HAS_ARMV82}" ]
        then
            DIR="aarch64"
        else
--- a/docs/en/development/build.md
+++ b/docs/en/development/build.md
@ -33,6 +33,13 @@ On Ubuntu/Debian you can use the automatic installation script (check [official
 sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
 ```

+Note: in case of troubles, you can also use this:
+
+```bash
+sudo apt-get install software-properties-common
+sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
+```
+
 For other Linux distribution - check the availability of the [prebuild packages](https://releases.llvm.org/download.html) or build clang [from sources](https://clang.llvm.org/get_started.html).

 #### Use the latest clang for Builds
--- a/docs/en/engines/table-engines/special/buffer.md
+++ b/docs/en/engines/table-engines/special/buffer.md
@ -8,6 +8,10 @@ sidebar_label:  Buffer

 Buffers the data to write in RAM, periodically flushing it to another table. During the read operation, data is read from the buffer and the other table simultaneously.

+:::note
+A recommended alternative to the Buffer Table Engine is enabling [asynchronous inserts](/docs/en/guides/best-practices/asyncinserts.md).
+:::
+
 ``` sql
 Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes)
 ```
@ -24,7 +28,7 @@ Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_

 #### num_layers

-`num_layers` – Parallelism layer. Physically, the table will be represented as `num_layers` of independent buffers. Recommended value: 16.
+`num_layers` – Parallelism layer. Physically, the table will be represented as `num_layers` of independent buffers.

 #### min_time, max_time, min_rows, max_rows, min_bytes, and max_bytes

@ -34,11 +38,11 @@ Conditions for flushing data from the buffer.

 #### flush_time, flush_rows, and flush_bytes

-Conditions for flushing data from the buffer, that will happen only in background (omitted or zero means no `flush*` parameters).
+Conditions for flushing data from the buffer in the background (omitted or zero means no `flush*` parameters).

 Data is flushed from the buffer and written to the destination table if all the `min*` conditions or at least one `max*` condition are met.

-Also, if at least one `flush*` condition are met flush initiated in background, this is different from `max*`, since `flush*` allows you to configure background flushes separately to avoid adding latency for `INSERT` (into `Buffer`) queries.
+Also, if at least one `flush*` condition is met, a flush is initiated in the background. This differs from `max*` since `flush*` allows you to configure background flushes separately to avoid adding latency for `INSERT` queries into Buffer tables.

 #### min_time, max_time, and flush_time

@ -52,48 +56,54 @@ Condition for the number of rows in the buffer.

 Condition for the number of bytes in the buffer.

-During the write operation, data is inserted to a `num_layers` number of random buffers. Or, if the data part to insert is large enough (greater than `max_rows` or `max_bytes`), it is written directly to the destination table, omitting the buffer.
+During the write operation, data is inserted into one or more random buffers (configured with `num_layers`). Or, if the data part to insert is large enough (greater than `max_rows` or `max_bytes`), it is written directly to the destination table, omitting the buffer.

 The conditions for flushing the data are calculated separately for each of the `num_layers` buffers. For example, if `num_layers = 16` and `max_bytes = 100000000`, the maximum RAM consumption is 1.6 GB.

 Example:

 ``` sql
-CREATE TABLE merge.hits_buffer AS merge.hits ENGINE = Buffer(merge, hits, 16, 10, 100, 10000, 1000000, 10000000, 100000000)
+CREATE TABLE merge.hits_buffer AS merge.hits ENGINE = Buffer(merge, hits, 1, 10, 100, 10000, 1000000, 10000000, 100000000)
 ```

-Creating a `merge.hits_buffer` table with the same structure as `merge.hits` and using the Buffer engine. When writing to this table, data is buffered in RAM and later written to the ‘merge.hits’ table. 16 buffers are created. The data in each of them is flushed if either 100 seconds have passed, or one million rows have been written, or 100 MB of data have been written; or if simultaneously 10 seconds have passed and 10,000 rows and 10 MB of data have been written. For example, if just one row has been written, after 100 seconds it will be flushed, no matter what. But if many rows have been written, the data will be flushed sooner.
+Creating a `merge.hits_buffer` table with the same structure as `merge.hits` and using the Buffer engine. When writing to this table, data is buffered in RAM and later written to the ‘merge.hits’ table. A single buffer is created and the data is flushed if either:
+- 100 seconds have passed since the last flush (`max_time`) or
+- 1 million rows have been written (`max_rows`) or
+- 100 MB of data have been written (`max_bytes`) or
+- 10 seconds have passed (`min_time`) and 10,000 rows (`min_rows`) and 10 MB (`min_bytes`) of data have been written

-When the server is stopped, with `DROP TABLE` or `DETACH TABLE`, buffer data is also flushed to the destination table.
+For example, if just one row has been written, after 100 seconds, it will be flushed, no matter what. But if many rows have been written, the data will be flushed sooner.
+
+When the server is stopped, with `DROP TABLE` or `DETACH TABLE`, buffered data is also flushed to the destination table.

 You can set empty strings in single quotation marks for the database and table name. This indicates the absence of a destination table. In this case, when the data flush conditions are reached, the buffer is simply cleared. This may be useful for keeping a window of data in memory.

 When reading from a Buffer table, data is processed both from the buffer and from the destination table (if there is one).
-Note that the Buffer tables does not support an index. In other words, data in the buffer is fully scanned, which might be slow for large buffers. (For data in a subordinate table, the index that it supports will be used.)
+Note that the Buffer table does not support an index. In other words, data in the buffer is fully scanned, which might be slow for large buffers. (For data in a subordinate table, the index that it supports will be used.)

 If the set of columns in the Buffer table does not match the set of columns in a subordinate table, a subset of columns that exist in both tables is inserted.

 If the types do not match for one of the columns in the Buffer table and a subordinate table, an error message is entered in the server log, and the buffer is cleared.
-The same thing happens if the subordinate table does not exist when the buffer is flushed.
+The same happens if the subordinate table does not exist when the buffer is flushed.

 :::warning
-Running ALTER on the Buffer table in releases made before 26 Oct 2021 will cause a `Block structure mismatch` error (see [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117) and [#30565](https://github.com/ClickHouse/ClickHouse/pull/30565)), so deleting the Buffer table and then recreating is the only option. It is advisable to check that this error is fixed in your release before trying to run ALTER on the Buffer table.
+Running ALTER on the Buffer table in releases made before 26 Oct 2021 will cause a `Block structure mismatch` error (see [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117) and [#30565](https://github.com/ClickHouse/ClickHouse/pull/30565)), so deleting the Buffer table and then recreating is the only option. Check that this error is fixed in your release before trying to run ALTER on the Buffer table.
 :::

 If the server is restarted abnormally, the data in the buffer is lost.

-`FINAL` and `SAMPLE` do not work correctly for Buffer tables. These conditions are passed to the destination table, but are not used for processing data in the buffer. If these features are required we recommend only using the Buffer table for writing, while reading from the destination table.
+`FINAL` and `SAMPLE` do not work correctly for Buffer tables. These conditions are passed to the destination table but are not used for processing data in the buffer. If these features are required, we recommend only using the Buffer table for writing while reading from the destination table.

-When adding data to a Buffer, one of the buffers is locked. This causes delays if a read operation is simultaneously being performed from the table.
+When adding data to a Buffer table, one of the buffers is locked. This causes delays if a read operation is simultaneously being performed from the table.

-Data that is inserted to a Buffer table may end up in the subordinate table in a different order and in different blocks. Because of this, a Buffer table is difficult to use for writing to a CollapsingMergeTree correctly. To avoid problems, you can set `num_layers` to 1.
+Data that is inserted into a Buffer table may end up in the subordinate table in a different order and in different blocks. Because of this, a Buffer table is difficult to use for writing to a CollapsingMergeTree correctly. To avoid problems, you can set `num_layers` to 1.

 If the destination table is replicated, some expected characteristics of replicated tables are lost when writing to a Buffer table. The random changes to the order of rows and sizes of data parts cause data deduplication to quit working, which means it is not possible to have a reliable ‘exactly once’ write to replicated tables.

 Due to these disadvantages, we can only recommend using a Buffer table in rare cases.

-A Buffer table is used when too many INSERTs are received from a large number of servers over a unit of time and data can’t be buffered before insertion, which means the INSERTs can’t run fast enough.
+A Buffer table is used when too many INSERTs are received from a large number of servers over a unit of time, and data can’t be buffered before insertion, which means the INSERTs can’t run fast enough.

-Note that it does not make sense to insert data one row at a time, even for Buffer tables. This will only produce a speed of a few thousand rows per second, while inserting larger blocks of data can produce over a million rows per second (see the section “Performance”).
+Note that it does not make sense to insert data one row at a time, even for Buffer tables. This will only produce a speed of a few thousand rows per second while inserting larger blocks of data can produce over a million rows per second.

 [Original article](https://clickhouse.com/docs/en/engines/table-engines/special/buffer/) <!--hide-->
--- a/docs/en/getting-started/install.md
+++ b/docs/en/getting-started/install.md
@ -6,10 +6,11 @@ slug: /en/install

 # Installing ClickHouse

-You have two options for getting up and running with ClickHouse:
+You have three options for getting up and running with ClickHouse:

- **[ClickHouse Cloud](https://clickhouse.com/cloud/):** the official ClickHouse as a service, - built by, maintained, and supported by the creators of ClickHouse
- **[Self-managed ClickHouse](https://github.com/ClickHouse/ClickHouse):** ClickHouse can run on any Linux, FreeBSD, or Mac OS X with x86_64, AArch64, or PowerPC64LE CPU architecture
+- **[ClickHouse Cloud](https://clickhouse.com/cloud/):** The official ClickHouse as a service, - built by, maintained and supported by the creators of ClickHouse
+- **[Self-managed ClickHouse](#self-managed-install):** ClickHouse can run on any Linux, FreeBSD, or Mac OS X with x86-64, ARM, or PowerPC64LE CPU architecture
+- **[Docker Image](https://hub.docker.com/r/clickhouse/clickhouse-server/):** Read the guide with the official image in Docker Hub

 ## ClickHouse Cloud

@ -22,73 +23,49 @@ The quickest and easiest way to get up and running with ClickHouse is to create

 Once your Cloud service is provisioned, you will be able to [connect to it](/docs/en/integrations/connect-a-client.md) and start [inserting data](/docs/en/integrations/data-ingestion.md).

-:::note
-The [Quick Start](/docs/en/quick-start.mdx) walks through the steps to get a ClickHouse Cloud service up and running, connecting to it, and inserting data.
-:::
-
-## Self-Managed Requirements
-
-### CPU Architecture
-
-ClickHouse can run on any Linux, FreeBSD, or Mac OS X with x86_64, AArch64, or PowerPC64LE CPU architecture.
-
-Official pre-built binaries are typically compiled for x86_64 and leverage SSE 4.2 instruction set, so unless otherwise stated usage of CPU that supports it becomes an additional system requirement. Here’s the command to check if current CPU has support for SSE 4.2:
-
-``` bash
-$ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not supported"
-```
-
-To run ClickHouse on processors that do not support SSE 4.2 or have AArch64 or PowerPC64LE architecture, you should [build ClickHouse from sources](#from-sources) with proper configuration adjustments.
-
-ClickHouse implements parallel data processing and uses all the hardware resources available. When choosing a processor, take into account that ClickHouse works more efficiently at configurations with a large number of cores but a lower clock rate than at configurations with fewer cores and a higher clock rate. For example, 16 cores with 2600 MHz is preferable to 8 cores with 3600 MHz.
-
-It is recommended to use **Turbo Boost** and **hyper-threading** technologies. It significantly improves performance with a typical workload.
-
-### RAM {#ram}
-
-We recommend using a minimum of 4GB of RAM to perform non-trivial queries. The ClickHouse server can run with a much smaller amount of RAM, but it requires memory for processing queries.
-
-The required volume of RAM depends on:
-
-   The complexity of queries.
-   The amount of data that is processed in queries.
-
-To calculate the required volume of RAM, you should estimate the size of temporary data for [GROUP BY](/docs/en/sql-reference/statements/select/group-by.md#select-group-by-clause), [DISTINCT](/docs/en/sql-reference/statements/select/distinct.md#select-distinct), [JOIN](/docs/en/sql-reference/statements/select/join.md#select-join) and other operations you use.
-
-ClickHouse can use external memory for temporary data. See [GROUP BY in External Memory](/docs/en/sql-reference/statements/select/group-by.md#select-group-by-in-external-memory) for details.
-
-### Swap File {#swap-file}
-
-Disable the swap file for production environments.
-
-### Storage Subsystem {#storage-subsystem}
-
-You need to have 2GB of free disk space to install ClickHouse.
-
-The volume of storage required for your data should be calculated separately. Assessment should include:
-
-   Estimation of the data volume.
-
-    You can take a sample of the data and get the average size of a row from it. Then multiply the value by the number of rows you plan to store.
-
-   The data compression coefficient.
-
-    To estimate the data compression coefficient, load a sample of your data into ClickHouse, and compare the actual size of the data with the size of the table stored. For example, clickstream data is usually compressed by 6-10 times.
-
-To calculate the final volume of data to be stored, apply the compression coefficient to the estimated data volume. If you plan to store data in several replicas, then multiply the estimated volume by the number of replicas.
-
-### Network {#network}
-
-If possible, use networks of 10G or higher class.
-
-The network bandwidth is critical for processing distributed queries with a large amount of intermediate data. Besides, network speed affects replication processes.
-
-### Software {#software}
-
-ClickHouse is developed primarily for the Linux family of operating systems. The recommended Linux distribution is Ubuntu. The `tzdata` package should be installed in the system.

 ## Self-Managed Install

+1. The simplest way to download ClickHouse locally is to run the following command. If your operating system is supported, an appropriate ClickHouse binary will be downloaded and made runnable:
+  ```bash
+  curl https://clickhouse.com/ | sh
+  ```
+
+1. Run the `install` command, which defines a collection of useful symlinks along with the files and folders used by ClickHouse - all of which you can see in the output of the install script:
+  ```bash
+  sudo ./clickhouse install
+  ```
+
+1. At the end of the install script, you are prompted for a password for the `default` user. Feel free to enter a password, or you can optionally leave it blank:
+  ```response
+  Creating log directory /var/log/clickhouse-server.
+  Creating data directory /var/lib/clickhouse.
+  Creating pid directory /var/run/clickhouse-server.
+   chown -R clickhouse:clickhouse '/var/log/clickhouse-server'
+   chown -R clickhouse:clickhouse '/var/run/clickhouse-server'
+   chown  clickhouse:clickhouse '/var/lib/clickhouse'
+  Enter password for default user:
+  ```
+  You should see the following output:
+  ```response
+   ClickHouse has been successfully installed.
+
+   Start clickhouse-server with:
+    sudo clickhouse start
+
+   Start clickhouse-client with:
+    clickhouse-client
+  ```
+
+1. Run the following command to start the ClickHouse server:
+  ```bash
+  sudo clickhouse start
+  ```
+
+:::tip
+The [Quick Start](/docs/en/quick-start.mdx/#step-1-get-clickhouse) walks through the steps to download and run ClickHouse, connect to it, and insert data.
+:::
+
 ## Available Installation Options {#available-installation-options}

 ### From DEB Packages {#install-from-deb-packages}
@ -278,50 +255,16 @@ For production environments, it’s recommended to use the latest `stable`-versi

 To run ClickHouse inside Docker follow the guide on [Docker Hub](https://hub.docker.com/r/clickhouse/clickhouse-server/). Those images use official `deb` packages inside.

-### Single Binary {#from-single-binary}
-
-You can install ClickHouse on Linux using a single portable binary from the latest commit of the `master` branch: [https://builds.clickhouse.com/master/amd64/clickhouse].
-
-``` bash
-curl -O 'https://builds.clickhouse.com/master/amd64/clickhouse' && chmod a+x clickhouse
-sudo ./clickhouse install
-```
-
-### From Precompiled Binaries for Non-Standard Environments {#from-binaries-non-linux}
-
-For non-Linux operating systems and for AArch64 CPU architecture, ClickHouse builds are provided as a cross-compiled binary from the latest commit of the `master` branch (with a few hours delay).
-
-   [MacOS x86_64](https://builds.clickhouse.com/master/macos/clickhouse)
-     ```bash
-     curl -O 'https://builds.clickhouse.com/master/macos/clickhouse' && chmod a+x ./clickhouse
-     ```
-   [MacOS Aarch64 (Apple Silicon)](https://builds.clickhouse.com/master/macos-aarch64/clickhouse)
-    ```bash
-    curl -O 'https://builds.clickhouse.com/master/macos-aarch64/clickhouse' && chmod a+x ./clickhouse
-    ```
-   [FreeBSD x86_64](https://builds.clickhouse.com/master/freebsd/clickhouse)
-    ```bash
-    curl -O 'https://builds.clickhouse.com/master/freebsd/clickhouse' && chmod a+x ./clickhouse
-    ```
-   [Linux AArch64](https://builds.clickhouse.com/master/aarch64/clickhouse)
-    ```bash
-    curl -O 'https://builds.clickhouse.com/master/aarch64/clickhouse' && chmod a+x ./clickhouse
-    ```
-
-Run `sudo ./clickhouse install` to install ClickHouse system-wide (also with needed configuration files, configuring users etc.). Then run `sudo clickhouse start` commands to start the clickhouse-server and `clickhouse-client` to connect to it.
-
-Use the `clickhouse client` to connect to the server, or `clickhouse local` to process local data.
-
 ### From Sources {#from-sources}

 To manually compile ClickHouse, follow the instructions for [Linux](/docs/en/development/build.md) or [Mac OS X](/docs/en/development/build-osx.md).

-You can compile packages and install them or use programs without installing packages. Also by building manually you can disable SSE 4.2 requirement or build for AArch64 CPUs.
+You can compile packages and install them or use programs without installing packages.

-      Client: programs/clickhouse-client
-      Server: programs/clickhouse-server
+      Client: <build_directory>/programs/clickhouse-client
+      Server: <build_directory>/programs/clickhouse-server

-You’ll need to create a data and metadata folders and `chown` them for the desired user. Their paths can be changed in server config (src/programs/server/config.xml), by default they are:
+You’ll need to create data and metadata folders manually and `chown` them for the desired user. Their paths can be changed in server config (src/programs/server/config.xml), by default they are:

      /var/lib/clickhouse/data/default/
      /var/lib/clickhouse/metadata/default/
@ -406,3 +349,42 @@ SELECT 1
 **Congratulations, the system works!**

 To continue experimenting, you can download one of the test data sets or go through [tutorial](/docs/en/tutorial.md).
+
+## Recommendations for Self-Managed ClickHouse
+
+ClickHouse can run on any Linux, FreeBSD, or Mac OS X with x86-64, ARM, or PowerPC64LE CPU architecture.
+
+ClickHouse uses all hardware resources available to process data.
+
+ClickHouse tends to work more efficiently with a large number of cores at a lower clock rate than with fewer cores at a higher clock rate.
+
+We recommend using a minimum of 4GB of RAM to perform non-trivial queries. The ClickHouse server can run with a much smaller amount of RAM, but queries will then frequently abort.
+
+The required volume of RAM generally depends on:
+
+-   The complexity of queries.
+-   The amount of data that is processed in queries.
+
+To calculate the required volume of RAM, you may estimate the size of temporary data for [GROUP BY](/docs/en/sql-reference/statements/select/group-by.md#select-group-by-clause), [DISTINCT](/docs/en/sql-reference/statements/select/distinct.md#select-distinct), [JOIN](/docs/en/sql-reference/statements/select/join.md#select-join) and other operations you use.
+
+To reduce memory consumption, ClickHouse can swap temporary data to external storage. See [GROUP BY in External Memory](/docs/en/sql-reference/statements/select/group-by.md#select-group-by-in-external-memory) for details.
+
+We recommend to disable the operating system's swap file in production environments.
+
+The ClickHouse binary requires at least 2.5 GB of disk space for installation.
+
+The volume of storage required for your data may be calculated separately based on
+
+-   an estimation of the data volume.
+
+    You can take a sample of the data and get the average size of a row from it. Then multiply the value by the number of rows you plan to store.
+
+-   The data compression coefficient.
+
+    To estimate the data compression coefficient, load a sample of your data into ClickHouse, and compare the actual size of the data with the size of the table stored. For example, clickstream data is usually compressed by 6-10 times.
+
+To calculate the final volume of data to be stored, apply the compression coefficient to the estimated data volume. If you plan to store data in several replicas, then multiply the estimated volume by the number of replicas.
+
+For distributed ClickHouse deployments (clustering), we recommend at least 10G class network connectivity.
+
+Network bandwidth is critical for processing distributed queries with a large amount of intermediate data. Besides, network speed affects replication processes.
--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@ -1415,7 +1415,7 @@ Differs from [PrettySpaceNoEscapes](#prettyspacenoescapes) in that up to 10,000

 ## RowBinary {#rowbinary}

-Formats and parses data by row in binary format. Rows and values are listed consecutively, without separators.
+Formats and parses data by row in binary format. Rows and values are listed consecutively, without separators. Because data is in the binary format the delimiter after `FORMAT RowBinary` is strictly specified as next: any number of whitespaces (`' '` - space, code `0x20`; `'\t'` - tab, code `0x09`; `'\f'` - form feed, code `0x0C`) followed by exactly one new line sequence (Windows style `"\r\n"` or Unix style `'\n'`), immediately followed by binary data.
 This format is less efficient than the Native format since it is row-based.

 Integers use fixed-length little-endian representation. For example, UInt64 uses 8 bytes.
--- a/docs/ru/interfaces/formats.md
+++ b/docs/ru/interfaces/formats.md
@ -948,7 +948,7 @@ $ watch -n1 "clickhouse-client --query='SELECT event, value FROM system.events F

 ## RowBinary {#rowbinary}

-Форматирует и парсит данные по строкам, в бинарном виде. Строки и значения уложены подряд, без разделителей.
+Форматирует и парсит данные по строкам, в бинарном виде. Строки и значения уложены подряд, без разделителей. Так как данные представлены в бинарном виде, разделитель после `FORMAT RowBinary` строго определен в следующем виде: любое количество пробелов (`' '` - space, код `0x20`; `'\t'` - tab, код `0x09`; `'\f'` - form feed, код `0x0C`), следующая за этим одна последовательность конца строки (Windows style `"\r\n"` или Unix style `'\n'`), и непосредственно следующие за этим бинарные данные.
 Формат менее эффективен, чем формат Native, так как является строковым.

 Числа представлены в little endian формате фиксированной длины. Для примера, UInt64 занимает 8 байт.
--- a/packages/clickhouse-server.service
+++ b/packages/clickhouse-server.service
@ -11,8 +11,8 @@ Wants=time-sync.target
 [Service]
 Type=notify

-# Switching off watchdog is very important for sd_notify to work correctly.
-Environment=CLICKHOUSE_WATCHDOG_ENABLE=0
+# NOTE: we leave clickhouse watchdog process enabled to be able to see OOM/SIGKILL traces in clickhouse-server.log files.
+# If you wish to disable the watchdog and rely on systemd logs just add "Environment=CLICKHOUSE_WATCHDOG_ENABLE=0" line.
 User=clickhouse
 Group=clickhouse
 Restart=always
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@ -70,6 +70,8 @@
 #include <QueryPipeline/ConnectionCollector.h>
 #include <Dictionaries/registerDictionaries.h>
 #include <Disks/registerDisks.h>
+#include <IO/Resource/registerSchedulerNodes.h>
+#include <IO/Resource/registerResourceManagers.h>
 #include <Common/Config/ConfigReloader.h>
 #include <Server/HTTPHandlerFactory.h>
 #include "MetricsTransmitter.h"
@ -287,7 +289,6 @@ namespace ErrorCodes
    extern const int MISMATCHING_USERS_FOR_PROCESS_AND_DATA;
    extern const int NETWORK_ERROR;
    extern const int CORRUPTED_DATA;
-    extern const int SYSTEM_ERROR;
 }


@ -661,51 +662,6 @@ static void sanityChecks(Server & server)
    }
 }

-#if defined(OS_LINUX)
-/// Sends notification to systemd, analogous to sd_notify from libsystemd
-static void systemdNotify(const std::string_view & command)
-{
-    const char * path = getenv("NOTIFY_SOCKET");  // NOLINT(concurrency-mt-unsafe)
-
-    if (path == nullptr)
-        return; /// not using systemd
-
-    int s = socket(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0);
-
-    if (s == -1)
-        throwFromErrno("Can't create UNIX socket for systemd notify.", ErrorCodes::SYSTEM_ERROR);
-
-    SCOPE_EXIT({ close(s); });
-
-    const size_t len = strlen(path);
-
-    struct sockaddr_un addr;
-
-    addr.sun_family = AF_UNIX;
-
-    if (len < 2 || len > sizeof(addr.sun_path) - 1)
-        throw Exception(ErrorCodes::SYSTEM_ERROR, "NOTIFY_SOCKET env var value \"{}\" is wrong.", path);
-
-    memcpy(addr.sun_path, path, len + 1); /// write last zero as well.
-
-    size_t addrlen = offsetof(struct sockaddr_un, sun_path) + len;
-
-    /// '@' meass this is Linux abstract socket, per documentation it must be sun_path[0] must be set to '\0' for it.
-    if (path[0] == '@')
-        addr.sun_path[0] = 0;
-    else if (path[0] == '/')
-        addrlen += 1; /// non-abstract-addresses should be zero terminated.
-    else
-        throw Exception(ErrorCodes::SYSTEM_ERROR, "Wrong UNIX path \"{}\" in NOTIFY_SOCKET env var", path);
-
-    const struct sockaddr *sock_addr = reinterpret_cast <const struct sockaddr *>(&addr);
-
-    if (sendto(s, command.data(), command.size(), 0, sock_addr, static_cast <socklen_t>(addrlen)) != static_cast <ssize_t>(command.size()))
-        throw Exception("Failed to notify systemd.", ErrorCodes::SYSTEM_ERROR);
-
-}
-#endif
-
 int Server::main(const std::vector<std::string> & /*args*/)
 try
 {
@ -748,8 +704,8 @@ try
    else
    {
        const String config_path = config().getString("config-file", "config.xml");
-        const auto config_dir = std::filesystem::path{config_path}.remove_filename();
-        setenv("OPENSSL_CONF", config_dir.string() + "openssl.conf", true);
+        const auto config_dir = std::filesystem::path{config_path}.replace_filename("openssl.conf");
+        setenv("OPENSSL_CONF", config_dir.string(), true);
    }
 #endif

@ -761,6 +717,8 @@ try
    registerDisks(/* global_skip_access_check= */ false);
    registerFormats();
    registerRemoteFileMetadatas();
+    registerSchedulerNodes();
+    registerResourceManagers();

    CurrentMetrics::set(CurrentMetrics::Revision, ClickHouseRevision::getVersionRevision());
    CurrentMetrics::set(CurrentMetrics::VersionInteger, ClickHouseRevision::getVersionInteger());
@ -1335,6 +1293,11 @@ try
                global_context->getDistributedSchedulePool().increaseThreadsCount(new_pool_size);
            }

+            if (config->has("resources"))
+            {
+                global_context->getResourceManager()->updateConfiguration(*config);
+            }
+
            if (!initial_loading)
            {
                /// We do not load ZooKeeper configuration on the first config loading
@ -1861,6 +1824,9 @@ try
        }

 #if defined(OS_LINUX)
+        /// Tell the service manager that service startup is finished.
+        /// NOTE: the parent clickhouse-watchdog process must do systemdNotify("MAINPID={}\n", child_pid); before
+        /// the child process notifies 'READY=1'.
        systemdNotify("READY=1\n");
 #endif

--- a/rust/.cargo/config.toml.in
+++ b/rust/.cargo/config.toml.in
@ -0,0 +1,3 @@
+[env]
+CFLAGS = "@RUST_CFLAGS@"
+CXXFLAGS = "@RUST_CXXFLAGS@"
--- a/rust/BLAKE3/CMakeLists.txt
+++ b/rust/BLAKE3/CMakeLists.txt
@ -1,4 +1,3 @@
-corrosion_import_crate(MANIFEST_PATH Cargo.toml NO_STD)
-
+clickhouse_import_crate(MANIFEST_PATH Cargo.toml)
 target_include_directories(_ch_rust_blake3 INTERFACE include)
-add_library(ch_rust::blake3 ALIAS _ch_rust_blake3)
+add_library(ch_rust::blake3 ALIAS _ch_rust_blake3)
--- a/rust/CMakeLists.txt
+++ b/rust/CMakeLists.txt
@ -1 +1,43 @@
+# NOTE: should be macro to export RUST_CXXFLAGS/RUST_CFLAGS for subfolders
+macro(configure_rustc)
+    # NOTE: this can also be done by overriding rustc, but it not trivial with rustup.
+    set(RUST_CFLAGS "${CMAKE_C_FLAGS}")
+
+    set(CXX_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/llvm-project/libcxx/include")
+    set(RUST_CXXFLAGS "${CMAKE_CXX_FLAGS} -isystem ${CXX_INCLUDE_DIR} -nostdinc++")
+
+    if (CMAKE_OSX_SYSROOT)
+        set(RUST_CXXFLAGS "${RUST_CXXFLAGS} -isysroot ${CMAKE_OSX_SYSROOT}")
+        set(RUST_CFLAGS "${RUST_CFLAGS} -isysroot ${CMAKE_OSX_SYSROOT}")
+    elseif(CMAKE_SYSROOT)
+        set(RUST_CXXFLAGS "${RUST_CXXFLAGS} --sysroot ${CMAKE_SYSROOT}")
+        set(RUST_CFLAGS "${RUST_CFLAGS} --sysroot ${CMAKE_SYSROOT}")
+    endif()
+
+    message(STATUS "RUST_CFLAGS: ${RUST_CFLAGS}")
+    message(STATUS "RUST_CXXFLAGS: ${RUST_CXXFLAGS}")
+
+    # NOTE: requires RW access for the source dir
+    configure_file("${CMAKE_CURRENT_SOURCE_DIR}/.cargo/config.toml.in" "${CMAKE_CURRENT_SOURCE_DIR}/.cargo/config.toml" @ONLY)
+endmacro()
+configure_rustc()
+
+function(clickhouse_import_crate)
+    # This is a workaround for Corrosion case sensitive build type matching in
+    # _generator_add_cargo_targets(), that leads to different paths in
+    # IMPORTED_LOCATION and real path of the library.
+    #
+    # It uses CMAKE_CONFIGURATION_TYPES and $<CONFIG>, so here we preserve the
+    # case of ${CMAKE_BUILD_TYPE} in ${CMAKE_CONFIGURATION_TYPES}.
+    if ("${CMAKE_BUILD_TYPE_UC}" STREQUAL "DEBUG")
+        set(CMAKE_CONFIGURATION_TYPES "${CMAKE_BUILD_TYPE};release")
+    else()
+        set(CMAKE_CONFIGURATION_TYPES "${CMAKE_BUILD_TYPE};debug")
+    endif()
+    # NOTE: we may use LTO for rust too
+
+    corrosion_import_crate(NO_STD ${ARGN})
+endfunction()
+
 add_subdirectory (BLAKE3)
+add_subdirectory (skim)
--- a/rust/skim/.cargo/config.toml.in
+++ b/rust/skim/.cargo/config.toml.in
@ -0,0 +1,2 @@
+[env]
+CXXFLAGS = "@RUST_CXXFLAGS@"
--- a/rust/skim/.gitignore
+++ b/rust/skim/.gitignore
@ -0,0 +1,2 @@
+build.rs
+.cargo/config.toml
--- a/rust/skim/CMakeLists.txt
+++ b/rust/skim/CMakeLists.txt
@ -0,0 +1,56 @@
+if (OS_FREEBSD)
+    # Right nix/libc requires fspacectl and it had been added only since FreeBSD14.
+    # And sicne sysroot has older libararies you will got undefined reference for clickhouse binary.
+    #
+    # But likely everything should work without this syscall, however it is not
+    # possible right now to gently override libraries versions for depdendcies,
+    # and forking rust modules is a little bit too much for this thing.
+    #
+    # You can take a look at the details in the fillowing issue [1].
+    #
+    #   [1]: https://github.com/rust-lang/cargo/issues/5640
+    #
+    message(STATUS "skim is disabled for FreeBSD")
+    return()
+endif()
+
+clickhouse_import_crate(MANIFEST_PATH Cargo.toml)
+
+# -Wno-dollar-in-identifier-extension: cxx bridge complies names with '$'
+# -Wno-unused-macros: unused CXXBRIDGE1_RUST_STRING
+set(CXXBRIDGE_CXXFLAGS "-Wno-dollar-in-identifier-extension -Wno-unused-macros")
+set(RUST_CXXFLAGS "${RUST_CXXFLAGS} ${CXXBRIDGE_CXXFLAGS}")
+message(STATUS "RUST_CXXFLAGS (for skim): ${RUST_CXXFLAGS}")
+# NOTE: requires RW access for the source dir
+configure_file("${CMAKE_CURRENT_SOURCE_DIR}/build.rs.in" "${CMAKE_CURRENT_SOURCE_DIR}/build.rs" @ONLY)
+configure_file("${CMAKE_CURRENT_SOURCE_DIR}/.cargo/config.toml.in" "${CMAKE_CURRENT_SOURCE_DIR}/.cargo/config.toml" @ONLY)
+
+set (ffi_binding_generated_path
+     ${CMAKE_BINARY_DIR}/${CMAKE_BUILD_TYPE}/cargo/build/${Rust_CARGO_TARGET_CACHED}/cxxbridge/_ch_rust_skim_rust/src/lib.rs.cc)
+set (ffi_binding_final_path ${CMAKE_CURRENT_BINARY_DIR}/skim-ffi.cc)
+message(STATUS "Writing FFI Binding for skim: ${ffi_binding_generated_path} => ${ffi_binding_final_path}")
+
+add_custom_command(OUTPUT ${ffi_binding_final_path}
+    COMMAND ${CMAKE_COMMAND} -E copy ${ffi_binding_generated_path} ${ffi_binding_final_path}
+    DEPENDS cargo-build__ch_rust_skim_rust)
+
+add_library(_ch_rust_skim_ffi ${ffi_binding_final_path})
+if (USE_STATIC_LIBRARIES OR NOT SPLIT_SHARED_LIBRARIES)
+    # static
+else()
+    if (OS_DARWIN)
+        target_link_libraries(_ch_rust_skim_ffi PRIVATE -Wl,-undefined,dynamic_lookup)
+    else()
+        target_link_libraries(_ch_rust_skim_ffi PRIVATE -Wl,--unresolved-symbols=ignore-all)
+    endif()
+endif()
+# cxx bridge compiles such bindings
+set_target_properties(_ch_rust_skim_ffi PROPERTIES COMPILE_FLAGS "${CXXBRIDGE_CXXFLAGS}")
+
+add_library(_ch_rust_skim INTERFACE)
+target_include_directories(_ch_rust_skim INTERFACE include)
+target_link_libraries(_ch_rust_skim INTERFACE
+    _ch_rust_skim_rust
+    _ch_rust_skim_ffi)
+
+add_library(ch_rust::skim ALIAS _ch_rust_skim)
--- a/rust/skim/Cargo.lock
+++ b/rust/skim/Cargo.lock
@ -0,0 +1,983 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "_ch_rust_skim_rust"
+version = "0.1.0"
+dependencies = [
+ "cxx",
+ "cxx-build",
+ "skim",
+ "term",
+]
+
+[[package]]
+name = "aho-corasick"
+version = "0.7.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "android_system_properties"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "arrayvec"
+version = "0.7.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6"
+
+[[package]]
+name = "atty"
+version = "0.2.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
+dependencies = [
+ "hermit-abi",
+ "libc",
+ "winapi",
+]
+
+[[package]]
+name = "autocfg"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
+
+[[package]]
+name = "beef"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1"
+
+[[package]]
+name = "bitflags"
+version = "1.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
+
+[[package]]
+name = "bumpalo"
+version = "3.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "572f695136211188308f16ad2ca5c851a712c464060ae6974944458eb83880ba"
+
+[[package]]
+name = "cc"
+version = "1.0.77"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e9f73505338f7d905b19d18738976aae232eb46b8efc15554ffc56deb5d9ebe4"
+
+[[package]]
+name = "cfg-if"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
+
+[[package]]
+name = "chrono"
+version = "0.4.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "16b0a3d9ed01224b22057780a37bb8c5dbfe1be8ba48678e7bf57ec4b385411f"
+dependencies = [
+ "iana-time-zone",
+ "js-sys",
+ "num-integer",
+ "num-traits",
+ "time 0.1.45",
+ "wasm-bindgen",
+ "winapi",
+]
+
+[[package]]
+name = "clap"
+version = "3.2.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "71655c45cb9845d3270c9d6df84ebe72b4dad3c2ba3f7023ad47c144e4e473a5"
+dependencies = [
+ "atty",
+ "bitflags",
+ "clap_lex",
+ "indexmap",
+ "once_cell",
+ "strsim",
+ "termcolor",
+ "textwrap",
+]
+
+[[package]]
+name = "clap_lex"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5"
+dependencies = [
+ "os_str_bytes",
+]
+
+[[package]]
+name = "codespan-reporting"
+version = "0.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3538270d33cc669650c4b093848450d380def10c331d38c768e34cac80576e6e"
+dependencies = [
+ "termcolor",
+ "unicode-width",
+]
+
+[[package]]
+name = "core-foundation-sys"
+version = "0.8.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc"
+
+[[package]]
+name = "crossbeam"
+version = "0.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2801af0d36612ae591caa9568261fddce32ce6e08a7275ea334a06a4ad021a2c"
+dependencies = [
+ "cfg-if",
+ "crossbeam-channel",
+ "crossbeam-deque",
+ "crossbeam-epoch",
+ "crossbeam-queue",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-channel"
+version = "0.5.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521"
+dependencies = [
+ "cfg-if",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-deque"
+version = "0.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "715e8152b692bba2d374b53d4875445368fdf21a94751410af607a5ac677d1fc"
+dependencies = [
+ "cfg-if",
+ "crossbeam-epoch",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-epoch"
+version = "0.9.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "01a9af1f4c2ef74bb8aa1f7e19706bc72d03598c8a570bb5de72243c7a9d9d5a"
+dependencies = [
+ "autocfg",
+ "cfg-if",
+ "crossbeam-utils",
+ "memoffset 0.7.1",
+ "scopeguard",
+]
+
+[[package]]
+name = "crossbeam-queue"
+version = "0.3.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d1cfb3ea8a53f37c40dea2c7bedcbd88bdfae54f5e2175d6ecaff1c988353add"
+dependencies = [
+ "cfg-if",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-utils"
+version = "0.8.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4fb766fa798726286dbbb842f174001dab8abc7b627a1dd86e0b7222a95d929f"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "cxx"
+version = "1.0.83"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bdf07d07d6531bfcdbe9b8b739b104610c6508dcc4d63b410585faf338241daf"
+dependencies = [
+ "cc",
+ "cxxbridge-flags",
+ "cxxbridge-macro",
+ "link-cplusplus",
+]
+
+[[package]]
+name = "cxx-build"
+version = "1.0.83"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d2eb5b96ecdc99f72657332953d4d9c50135af1bac34277801cc3937906ebd39"
+dependencies = [
+ "cc",
+ "codespan-reporting",
+ "once_cell",
+ "proc-macro2",
+ "quote",
+ "scratch",
+ "syn",
+]
+
+[[package]]
+name = "cxxbridge-flags"
+version = "1.0.83"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac040a39517fd1674e0f32177648334b0f4074625b5588a64519804ba0553b12"
+
+[[package]]
+name = "cxxbridge-macro"
+version = "1.0.83"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1362b0ddcfc4eb0a1f57b68bd77dd99f0e826958a96abd0ae9bd092e114ffed6"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "darling"
+version = "0.14.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b0dd3cd20dc6b5a876612a6e5accfe7f3dd883db6d07acfbf14c128f61550dfa"
+dependencies = [
+ "darling_core",
+ "darling_macro",
+]
+
+[[package]]
+name = "darling_core"
+version = "0.14.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a784d2ccaf7c98501746bf0be29b2022ba41fd62a2e622af997a03e9f972859f"
+dependencies = [
+ "fnv",
+ "ident_case",
+ "proc-macro2",
+ "quote",
+ "strsim",
+ "syn",
+]
+
+[[package]]
+name = "darling_macro"
+version = "0.14.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7618812407e9402654622dd402b0a89dff9ba93badd6540781526117b92aab7e"
+dependencies = [
+ "darling_core",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "defer-drop"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f613ec9fa66a6b28cdb1842b27f9adf24f39f9afc4dcdd9fdecee4aca7945c57"
+dependencies = [
+ "crossbeam-channel",
+ "once_cell",
+]
+
+[[package]]
+name = "derive_builder"
+version = "0.11.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d07adf7be193b71cc36b193d0f5fe60b918a3a9db4dad0449f57bcfd519704a3"
+dependencies = [
+ "derive_builder_macro",
+]
+
+[[package]]
+name = "derive_builder_core"
+version = "0.11.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1f91d4cfa921f1c05904dc3c57b4a32c38aed3340cce209f3a6fd1478babafc4"
+dependencies = [
+ "darling",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "derive_builder_macro"
+version = "0.11.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f0314b72bed045f3a68671b3c86328386762c93f82d98c65c3cb5e5f573dd68"
+dependencies = [
+ "derive_builder_core",
+ "syn",
+]
+
+[[package]]
+name = "dirs-next"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1"
+dependencies = [
+ "cfg-if",
+ "dirs-sys-next",
+]
+
+[[package]]
+name = "dirs-sys-next"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d"
+dependencies = [
+ "libc",
+ "redox_users",
+ "winapi",
+]
+
+[[package]]
+name = "either"
+version = "1.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "90e5c1c8368803113bf0c9584fc495a58b86dc8a29edbf8fe877d21d9507e797"
+
+[[package]]
+name = "env_logger"
+version = "0.9.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a12e6657c4c97ebab115a42dcee77225f7f482cdd841cf7088c657a42e9e00e7"
+dependencies = [
+ "atty",
+ "humantime",
+ "log",
+ "regex",
+ "termcolor",
+]
+
+[[package]]
+name = "fnv"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
+
+[[package]]
+name = "fuzzy-matcher"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "54614a3312934d066701a80f20f15fa3b56d67ac7722b39eea5b4c9dd1d66c94"
+dependencies = [
+ "thread_local",
+]
+
+[[package]]
+name = "getrandom"
+version = "0.2.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "wasi 0.11.0+wasi-snapshot-preview1",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.12.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
+
+[[package]]
+name = "hermit-abi"
+version = "0.1.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "humantime"
+version = "2.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
+
+[[package]]
+name = "iana-time-zone"
+version = "0.1.53"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "64c122667b287044802d6ce17ee2ddf13207ed924c712de9a66a5814d5b64765"
+dependencies = [
+ "android_system_properties",
+ "core-foundation-sys",
+ "iana-time-zone-haiku",
+ "js-sys",
+ "wasm-bindgen",
+ "winapi",
+]
+
+[[package]]
+name = "iana-time-zone-haiku"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0703ae284fc167426161c2e3f1da3ea71d94b21bedbcc9494e92b28e334e3dca"
+dependencies = [
+ "cxx",
+ "cxx-build",
+]
+
+[[package]]
+name = "ident_case"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
+
+[[package]]
+name = "indexmap"
+version = "1.9.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1885e79c1fc4b10f0e172c475f458b7f7b93061064d98c3293e98c5ba0c8b399"
+dependencies = [
+ "autocfg",
+ "hashbrown",
+]
+
+[[package]]
+name = "js-sys"
+version = "0.3.60"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "49409df3e3bf0856b916e2ceaca09ee28e6871cf7d9ce97a692cacfdb2a25a47"
+dependencies = [
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "lazy_static"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
+
+[[package]]
+name = "libc"
+version = "0.2.138"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "db6d7e329c562c5dfab7a46a2afabc8b987ab9a4834c9d1ca04dc54c1546cef8"
+
+[[package]]
+name = "link-cplusplus"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9272ab7b96c9046fbc5bc56c06c117cb639fe2d509df0c421cad82d2915cf369"
+dependencies = [
+ "cc",
+]
+
+[[package]]
+name = "log"
+version = "0.4.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "memchr"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
+
+[[package]]
+name = "memoffset"
+version = "0.6.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "memoffset"
+version = "0.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5de893c32cde5f383baa4c04c5d6dbdd735cfd4a794b0debdb2bb1b421da5ff4"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "nix"
+version = "0.24.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fa52e972a9a719cecb6864fb88568781eb706bac2cd1d4f04a648542dbf78069"
+dependencies = [
+ "bitflags",
+ "cfg-if",
+ "libc",
+]
+
+[[package]]
+name = "nix"
+version = "0.25.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f346ff70e7dbfd675fe90590b92d59ef2de15a8779ae305ebcbfd3f0caf59be4"
+dependencies = [
+ "autocfg",
+ "bitflags",
+ "cfg-if",
+ "libc",
+ "memoffset 0.6.5",
+ "pin-utils",
+]
+
+[[package]]
+name = "num-integer"
+version = "0.1.45"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9"
+dependencies = [
+ "autocfg",
+ "num-traits",
+]
+
+[[package]]
+name = "num-traits"
+version = "0.2.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "num_cpus"
+version = "1.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f6058e64324c71e02bc2b150e4f3bc8286db6c83092132ffa3f6b1eab0f9def5"
+dependencies = [
+ "hermit-abi",
+ "libc",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.16.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "86f0b0d4bf799edbc74508c1e8bf170ff5f41238e5f8225603ca7caaae2b7860"
+
+[[package]]
+name = "os_str_bytes"
+version = "6.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b7820b9daea5457c9f21c69448905d723fbd21136ccf521748f23fd49e723ee"
+
+[[package]]
+name = "pin-utils"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.47"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5ea3d908b0e36316caf9e9e2c4625cdde190a7e6f440d794667ed17a1855e725"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "rayon"
+version = "1.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6db3a213adf02b3bcfd2d3846bb41cb22857d131789e01df434fb7e7bc0759b7"
+dependencies = [
+ "either",
+ "rayon-core",
+]
+
+[[package]]
+name = "rayon-core"
+version = "1.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cac410af5d00ab6884528b4ab69d1e8e146e8d471201800fa1b4524126de6ad3"
+dependencies = [
+ "crossbeam-channel",
+ "crossbeam-deque",
+ "crossbeam-utils",
+ "num_cpus",
+]
+
+[[package]]
+name = "redox_syscall"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a"
+dependencies = [
+ "bitflags",
+]
+
+[[package]]
+name = "redox_users"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b"
+dependencies = [
+ "getrandom",
+ "redox_syscall",
+ "thiserror",
+]
+
+[[package]]
+name = "regex"
+version = "1.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e076559ef8e241f2ae3479e36f97bd5741c0330689e217ad51ce2c76808b868a"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.6.28"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848"
+
+[[package]]
+name = "rustversion"
+version = "1.0.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "97477e48b4cf8603ad5f7aaf897467cf42ab4218a38ef76fb14c2d6773a6d6a8"
+
+[[package]]
+name = "scopeguard"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
+
+[[package]]
+name = "scratch"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c8132065adcfd6e02db789d9285a0deb2f3fcb04002865ab67d5fb103533898"
+
+[[package]]
+name = "serde"
+version = "1.0.149"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "256b9932320c590e707b94576e3cc1f7c9024d0ee6612dfbcf1cb106cbe8e055"
+
+[[package]]
+name = "shlex"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3"
+
+[[package]]
+name = "skim"
+version = "0.10.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cebed5f897cd6c0d80fbe30adb36c0abf7400e93043a63ae56458495642b3485"
+dependencies = [
+ "atty",
+ "beef",
+ "bitflags",
+ "chrono",
+ "clap",
+ "crossbeam",
+ "defer-drop",
+ "derive_builder",
+ "env_logger",
+ "fuzzy-matcher",
+ "lazy_static",
+ "log",
+ "nix 0.25.1",
+ "rayon",
+ "regex",
+ "shlex",
+ "time 0.3.17",
+ "timer",
+ "tuikit",
+ "unicode-width",
+ "vte",
+]
+
+[[package]]
+name = "strsim"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
+
+[[package]]
+name = "syn"
+version = "1.0.105"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "60b9b43d45702de4c839cb9b51d9f529c5dd26a4aff255b42b1ebc03e88ee908"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "term"
+version = "0.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c59df8ac95d96ff9bede18eb7300b0fda5e5d8d90960e76f8e14ae765eedbf1f"
+dependencies = [
+ "dirs-next",
+ "rustversion",
+ "winapi",
+]
+
+[[package]]
+name = "termcolor"
+version = "1.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755"
+dependencies = [
+ "winapi-util",
+]
+
+[[package]]
+name = "textwrap"
+version = "0.16.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d"
+
+[[package]]
+name = "thiserror"
+version = "1.0.37"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "10deb33631e3c9018b9baf9dcbbc4f737320d2b576bac10f6aefa048fa407e3e"
+dependencies = [
+ "thiserror-impl",
+]
+
+[[package]]
+name = "thiserror-impl"
+version = "1.0.37"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "982d17546b47146b28f7c22e3d08465f6b8903d0ea13c1660d9d84a6e7adcdbb"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "thread_local"
+version = "1.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5516c27b78311c50bf42c071425c560ac799b11c30b31f87e3081965fe5e0180"
+dependencies = [
+ "once_cell",
+]
+
+[[package]]
+name = "time"
+version = "0.1.45"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1b797afad3f312d1c66a56d11d0316f916356d11bd158fbc6ca6389ff6bf805a"
+dependencies = [
+ "libc",
+ "wasi 0.10.0+wasi-snapshot-preview1",
+ "winapi",
+]
+
+[[package]]
+name = "time"
+version = "0.3.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a561bf4617eebd33bca6434b988f39ed798e527f51a1e797d0ee4f61c0a38376"
+dependencies = [
+ "serde",
+ "time-core",
+]
+
+[[package]]
+name = "time-core"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2e153e1f1acaef8acc537e68b44906d2db6436e2b35ac2c6b42640fff91f00fd"
+
+[[package]]
+name = "timer"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "31d42176308937165701f50638db1c31586f183f1aab416268216577aec7306b"
+dependencies = [
+ "chrono",
+]
+
+[[package]]
+name = "tuikit"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e19c6ab038babee3d50c8c12ff8b910bdb2196f62278776422f50390d8e53d8"
+dependencies = [
+ "bitflags",
+ "lazy_static",
+ "log",
+ "nix 0.24.3",
+ "term",
+ "unicode-width",
+]
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ceab39d59e4c9499d4e5a8ee0e2735b891bb7308ac83dfb4e80cad195c9f6f3"
+
+[[package]]
+name = "unicode-width"
+version = "0.1.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b"
+
+[[package]]
+name = "utf8parse"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "936e4b492acfd135421d8dca4b1aa80a7bfc26e702ef3af710e0752684df5372"
+
+[[package]]
+name = "vte"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1aae21c12ad2ec2d168c236f369c38ff332bc1134f7246350dca641437365045"
+dependencies = [
+ "arrayvec",
+ "utf8parse",
+ "vte_generate_state_changes",
+]
+
+[[package]]
+name = "vte_generate_state_changes"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d257817081c7dffcdbab24b9e62d2def62e2ff7d00b1c20062551e6cccc145ff"
+dependencies = [
+ "proc-macro2",
+ "quote",
+]
+
+[[package]]
+name = "wasi"
+version = "0.10.0+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f"
+
+[[package]]
+name = "wasi"
+version = "0.11.0+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
+
+[[package]]
+name = "wasm-bindgen"
+version = "0.2.83"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eaf9f5aceeec8be17c128b2e93e031fb8a4d469bb9c4ae2d7dc1888b26887268"
+dependencies = [
+ "cfg-if",
+ "wasm-bindgen-macro",
+]
+
+[[package]]
+name = "wasm-bindgen-backend"
+version = "0.2.83"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4c8ffb332579b0557b52d268b91feab8df3615f265d5270fec2a8c95b17c1142"
+dependencies = [
+ "bumpalo",
+ "log",
+ "once_cell",
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-macro"
+version = "0.2.83"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "052be0f94026e6cbc75cdefc9bae13fd6052cdcaf532fa6c45e7ae33a1e6c810"
+dependencies = [
+ "quote",
+ "wasm-bindgen-macro-support",
+]
+
+[[package]]
+name = "wasm-bindgen-macro-support"
+version = "0.2.83"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "07bc0c051dc5f23e307b13285f9d75df86bfdf816c5721e573dec1f9b8aa193c"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wasm-bindgen-backend",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-shared"
+version = "0.2.83"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1c38c045535d93ec4f0b4defec448e4291638ee608530863b1e2ba115d4fff7f"
+
+[[package]]
+name = "winapi"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
+dependencies = [
+ "winapi-i686-pc-windows-gnu",
+ "winapi-x86_64-pc-windows-gnu",
+]
+
+[[package]]
+name = "winapi-i686-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
+
+[[package]]
+name = "winapi-util"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178"
+dependencies = [
+ "winapi",
+]
+
+[[package]]
+name = "winapi-x86_64-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
--- a/rust/skim/Cargo.toml
+++ b/rust/skim/Cargo.toml
@ -0,0 +1,20 @@
+[package]
+name = "_ch_rust_skim_rust"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+skim = "0.10.2"
+cxx = "1.0.83"
+term = "0.7.0"
+
+[build-dependencies]
+cxx-build = "1.0.83"
+
+[lib]
+crate-type = ["staticlib"]
+
+[profile.release]
+debug = true
--- a/rust/skim/build.rs.in
+++ b/rust/skim/build.rs.in
@ -0,0 +1,8 @@
+fn main() {
+    let mut build = cxx_build::bridge("src/lib.rs");
+    for flag in "@RUST_CXXFLAGS@".split(' ') {
+        build.flag(flag);
+    }
+    build.compile("skim");
+    println!("cargo:rerun-if-changed=src/lib.rs");
+}
--- a/rust/skim/include/skim.h
+++ b/rust/skim/include/skim.h
@ -0,0 +1,90 @@
+/// This header was compiled with:
+///
+///     $ cxxbridge rust/skim/src/lib.rs --header
+///
+/// For more info [1].
+///
+///   [1]: https://cxx.rs/build/other.html
+
+#pragma once
+#include <array>
+#include <cstdint>
+#include <string>
+#include <vector>
+
+namespace rust {
+inline namespace cxxbridge1 {
+// #include "rust/cxx.h"
+
+struct unsafe_bitcopy_t;
+
+#ifndef CXXBRIDGE1_RUST_STRING
+#define CXXBRIDGE1_RUST_STRING
+class String final {
+public:
+  String() noexcept;
+  String(const String &) noexcept;
+  String(String &&) noexcept;
+  ~String() noexcept;
+
+  String(const std::string &);
+  String(const char *);
+  String(const char *, std::size_t);
+  String(const char16_t *);
+  String(const char16_t *, std::size_t);
+
+  static String lossy(const std::string &) noexcept;
+  static String lossy(const char *) noexcept;
+  static String lossy(const char *, std::size_t) noexcept;
+  static String lossy(const char16_t *) noexcept;
+  static String lossy(const char16_t *, std::size_t) noexcept;
+
+  String &operator=(const String &) &noexcept;
+  String &operator=(String &&) &noexcept;
+
+  explicit operator std::string() const;
+
+  const char *data() const noexcept;
+  std::size_t size() const noexcept;
+  std::size_t length() const noexcept;
+  bool empty() const noexcept;
+
+  const char *c_str() noexcept;
+
+  std::size_t capacity() const noexcept;
+  void reserve(size_t new_cap) noexcept;
+
+  using iterator = char *;
+  iterator begin() noexcept;
+  iterator end() noexcept;
+
+  using const_iterator = const char *;
+  const_iterator begin() const noexcept;
+  const_iterator end() const noexcept;
+  const_iterator cbegin() const noexcept;
+  const_iterator cend() const noexcept;
+
+  bool operator==(const String &) const noexcept;
+  bool operator!=(const String &) const noexcept;
+  bool operator<(const String &) const noexcept;
+  bool operator<=(const String &) const noexcept;
+  bool operator>(const String &) const noexcept;
+  bool operator>=(const String &) const noexcept;
+
+  void swap(String &) noexcept;
+
+  String(unsafe_bitcopy_t, const String &) noexcept;
+
+private:
+  struct lossy_t;
+  String(lossy_t, const char *, std::size_t) noexcept;
+  String(lossy_t, const char16_t *, std::size_t) noexcept;
+  friend void swap(String &lhs, String &rhs) noexcept { lhs.swap(rhs); }
+
+  std::array<std::uintptr_t, 3> repr;
+};
+#endif // CXXBRIDGE1_RUST_STRING
+} // namespace cxxbridge1
+} // namespace rust
+
+::rust::String skim(::std::vector<::std::string> const &words);
--- a/rust/skim/src/lib.rs
+++ b/rust/skim/src/lib.rs
@ -0,0 +1,54 @@
+use skim::prelude::*;
+use term::terminfo::TermInfo;
+use cxx::{CxxString, CxxVector};
+
+#[cxx::bridge]
+mod ffi {
+    extern "Rust" {
+        fn skim(words: &CxxVector<CxxString>) -> Result<String>;
+    }
+}
+
+struct Item {
+    text: String,
+}
+impl SkimItem for Item {
+    fn text(&self) -> Cow<str> {
+        return Cow::Borrowed(&self.text);
+    }
+}
+
+fn skim(words: &CxxVector<CxxString>) -> Result<String, String> {
+    // Let's check is terminal available. To avoid panic.
+    if let Err(err) = TermInfo::from_env() {
+        return Err(format!("{}", err));
+    }
+
+    let options = SkimOptionsBuilder::default()
+        .height(Some("30%"))
+        .tac(true)
+        .tiebreak(Some("-score".to_string()))
+        .build()
+        .unwrap();
+
+    let (tx, rx): (SkimItemSender, SkimItemReceiver) = unbounded();
+    for word in words {
+        tx.send(Arc::new(Item{ text: word.to_string() })).unwrap();
+    }
+    // so that skim could know when to stop waiting for more items.
+    drop(tx);
+
+    let output = Skim::run_with(&options, Some(rx));
+    if output.is_none() {
+        return Err("skim return nothing".to_string());
+    }
+    let output = output.unwrap();
+    if output.is_abort {
+        return Ok("".to_string());
+    }
+
+    if output.selected_items.is_empty() {
+        return Err("No items had been selected".to_string());
+    }
+    return Ok(output.selected_items[0].output().to_string());
+}
--- a/src/Access/SettingsConstraints.cpp
+++ b/src/Access/SettingsConstraints.cpp
@ -86,6 +86,49 @@ void SettingsConstraints::merge(const SettingsConstraints & other)
 }


+void SettingsConstraints::check(const Settings & current_settings, const SettingsProfileElements & profile_elements) const
+{
+    for (const auto & element : profile_elements)
+    {
+        if (SettingsProfileElements::isAllowBackupSetting(element.setting_name))
+            continue;
+
+        if (!element.value.isNull())
+        {
+            SettingChange value(element.setting_name, element.value);
+            check(current_settings, value);
+        }
+
+        if (!element.min_value.isNull())
+        {
+            SettingChange value(element.setting_name, element.min_value);
+            check(current_settings, value);
+        }
+
+        if (!element.max_value.isNull())
+        {
+            SettingChange value(element.setting_name, element.max_value);
+            check(current_settings, value);
+        }
+
+        SettingConstraintWritability new_value = SettingConstraintWritability::WRITABLE;
+        SettingConstraintWritability old_value = SettingConstraintWritability::WRITABLE;
+
+        if (element.writability)
+            new_value = *element.writability;
+
+        auto it = constraints.find(element.setting_name);
+        if (it != constraints.end())
+            old_value = it->second.writability;
+
+        if (new_value != old_value)
+        {
+            if (old_value == SettingConstraintWritability::CONST)
+                throw Exception("Setting " + element.setting_name + " should not be changed", ErrorCodes::SETTING_CONSTRAINT_VIOLATION);
+        }
+    }
+}
+
 void SettingsConstraints::check(const Settings & current_settings, const SettingChange & change) const
 {
    checkImpl(current_settings, const_cast<SettingChange &>(change), THROW_ON_VIOLATION);
--- a/src/Access/SettingsConstraints.h
+++ b/src/Access/SettingsConstraints.h
@ -73,6 +73,7 @@ public:
    void merge(const SettingsConstraints & other);

    /// Checks whether `change` violates these constraints and throws an exception if so.
+    void check(const Settings & current_settings, const SettingsProfileElements & profile_elements) const;
    void check(const Settings & current_settings, const SettingChange & change) const;
    void check(const Settings & current_settings, const SettingsChanges & changes) const;
    void check(const Settings & current_settings, SettingsChanges & changes) const;
--- a/src/Access/SettingsProfileElement.cpp
+++ b/src/Access/SettingsProfileElement.cpp
@ -75,6 +75,10 @@ void SettingsProfileElement::init(const ASTSettingsProfileElement & ast, const A
    }
 }

+bool SettingsProfileElement::isConstraint() const
+{
+    return this->writability || !this->min_value.isNull() || !this->max_value.isNull();
+}

 std::shared_ptr<ASTSettingsProfileElement> SettingsProfileElement::toAST() const
 {
@ -213,7 +217,7 @@ SettingsConstraints SettingsProfileElements::toSettingsConstraints(const AccessC
 {
    SettingsConstraints res{access_control};
    for (const auto & elem : *this)
-        if (!elem.setting_name.empty() && elem.setting_name != ALLOW_BACKUP_SETTING_NAME)
+        if (!elem.setting_name.empty() && elem.isConstraint() && elem.setting_name != ALLOW_BACKUP_SETTING_NAME)
            res.set(
                elem.setting_name,
                elem.min_value,
@ -248,4 +252,9 @@ bool SettingsProfileElements::isBackupAllowed() const
    return true;
 }

+bool SettingsProfileElements::isAllowBackupSetting(const String & setting_name)
+{
+    return setting_name == ALLOW_BACKUP_SETTING_NAME;
+}
+
 }
--- a/src/Access/SettingsProfileElement.h
+++ b/src/Access/SettingsProfileElement.h
@ -44,6 +44,8 @@ struct SettingsProfileElement
    std::shared_ptr<ASTSettingsProfileElement> toAST() const;
    std::shared_ptr<ASTSettingsProfileElement> toASTWithNames(const AccessControl & access_control) const;

+    bool isConstraint() const;
+
 private:
    void init(const ASTSettingsProfileElement & ast, const AccessControl * access_control);
 };
@ -71,6 +73,8 @@ public:
    std::vector<UUID> toProfileIDs() const;

    bool isBackupAllowed() const;
+
+    static bool isAllowBackupSetting(const String & setting_name);
 };

 }
--- a/src/Backups/BackupIO_S3.cpp
+++ b/src/Backups/BackupIO_S3.cpp
@ -129,7 +129,7 @@ BackupWriterS3::BackupWriterS3(
    , request_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()).request_settings)
    , log(&Poco::Logger::get("BackupWriterS3"))
 {
-    request_settings.updateFromSettingsIfEmpty(context_->getSettingsRef());
+    request_settings.updateFromSettings(context_->getSettingsRef());
    request_settings.max_single_read_retries = context_->getSettingsRef().s3_max_single_read_retries; // FIXME: Avoid taking value for endpoint
 }

@ -210,20 +210,21 @@ void BackupWriterS3::copyObjectMultipartImpl(
    std::vector<String> part_tags;

    size_t position = 0;
-    size_t upload_part_size = request_settings.min_upload_part_size;
+    const auto & settings = request_settings.getUploadSettings();
+    size_t upload_part_size = settings.min_upload_part_size;

    for (size_t part_number = 1; position < size; ++part_number)
    {
        /// Check that part number is not too big.
-        if (part_number > request_settings.max_part_number)
+        if (part_number > settings.max_part_number)
        {
            throw Exception(
                ErrorCodes::INVALID_CONFIG_PARAMETER,
                "Part number exceeded {} while writing {} bytes to S3. Check min_upload_part_size = {}, max_upload_part_size = {}, "
                "upload_part_size_multiply_factor = {}, upload_part_size_multiply_parts_count_threshold = {}, max_single_operation_copy_size = {}",
-                request_settings.max_part_number, size, request_settings.min_upload_part_size, request_settings.max_upload_part_size,
-                request_settings.upload_part_size_multiply_factor, request_settings.upload_part_size_multiply_parts_count_threshold,
-                request_settings.max_single_operation_copy_size);
+                settings.max_part_number, size, settings.min_upload_part_size, settings.max_upload_part_size,
+                settings.upload_part_size_multiply_factor, settings.upload_part_size_multiply_parts_count_threshold,
+                settings.max_single_operation_copy_size);
        }

        size_t next_position = std::min(position + upload_part_size, size);
@ -256,10 +257,10 @@ void BackupWriterS3::copyObjectMultipartImpl(
        position = next_position;

        /// Maybe increase `upload_part_size` (we need to increase it sometimes to keep `part_number` less or equal than `max_part_number`).
-        if (part_number % request_settings.upload_part_size_multiply_parts_count_threshold == 0)
+        if (part_number % settings.upload_part_size_multiply_parts_count_threshold == 0)
        {
-            upload_part_size *= request_settings.upload_part_size_multiply_factor;
-            upload_part_size = std::min(upload_part_size, request_settings.max_upload_part_size);
+            upload_part_size *= settings.upload_part_size_multiply_factor;
+            upload_part_size = std::min(upload_part_size, settings.max_upload_part_size);
        }
    }

@ -302,7 +303,7 @@ void BackupWriterS3::copyFileNative(DiskPtr from_disk, const String & file_name_
        auto file_path = fs::path(s3_uri.key) / file_name_to;

        auto head = S3::headObject(*client, source_bucket, objects[0].absolute_path).GetResult();
-        if (static_cast<size_t>(head.GetContentLength()) < request_settings.max_single_operation_copy_size)
+        if (static_cast<size_t>(head.GetContentLength()) < request_settings.getUploadSettings().max_single_operation_copy_size)
        {
            copyObjectImpl(
                source_bucket, objects[0].absolute_path, s3_uri.bucket, file_path, head);
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -88,6 +88,7 @@ add_headers_and_sources(clickhouse_common_io Common)
 add_headers_and_sources(clickhouse_common_io Common/HashTable)
 add_headers_and_sources(clickhouse_common_io IO)
 add_headers_and_sources(clickhouse_common_io IO/Archives)
+add_headers_and_sources(clickhouse_common_io IO/Resource)
 add_headers_and_sources(clickhouse_common_io IO/S3)
 list (REMOVE_ITEM clickhouse_common_io_sources Common/malloc.cpp Common/new_delete.cpp)

--- a/src/Common/ErrorCodes.cpp
+++ b/src/Common/ErrorCodes.cpp
@ -640,6 +640,9 @@
    M(669, NAMED_COLLECTION_DOESNT_EXIST) \
    M(670, NAMED_COLLECTION_ALREADY_EXISTS) \
    M(671, NAMED_COLLECTION_IS_IMMUTABLE) \
+    M(672, INVALID_SCHEDULER_NODE) \
+    M(673, RESOURCE_ACCESS_DENIED) \
+    M(674, RESOURCE_NOT_FOUND) \
    \
    M(999, KEEPER_EXCEPTION) \
    M(1000, POCO_EXCEPTION) \
--- a/src/Common/HashTable/HashTable.h
+++ b/src/Common/HashTable/HashTable.h
@ -389,6 +389,11 @@ public:
        zeroValue()->~Cell();
    }

+    void clearHasZeroFlag()
+    {
+        has_zero = false;
+    }
+
    Cell * zeroValue()             { return std::launder(reinterpret_cast<Cell*>(&zero_value_storage)); }
    const Cell * zeroValue() const { return std::launder(reinterpret_cast<const Cell*>(&zero_value_storage)); }
 };
@ -399,6 +404,7 @@ struct ZeroValueStorage<false, Cell>
    bool hasZero() const { return false; }
    void setHasZero() { throw DB::Exception("HashTable: logical error", DB::ErrorCodes::LOGICAL_ERROR); }
    void clearHasZero() {}
+    void clearHasZeroFlag() {}

    Cell * zeroValue()             { return nullptr; }
    const Cell * zeroValue() const { return nullptr; }
@ -652,6 +658,17 @@ protected:
                ///   [1]: https://github.com/google/sanitizers/issues/854#issuecomment-329661378
                __msan_unpoison(it.ptr, sizeof(*it.ptr));
            }
+
+            /// Everything had been destroyed in the loop above, reset the flag
+            /// only, without calling destructor.
+            this->clearHasZeroFlag();
+        }
+        else
+        {
+            /// NOTE: it is OK to call dtor for trivially destructible type
+            /// even the object hadn't been initialized, so no need to has
+            /// hasZero() check.
+            this->clearHasZero();
        }
    }

@ -1284,7 +1301,6 @@ public:
        Cell::State::read(rb);

        destroyElements();
-        this->clearHasZero();
        m_size = 0;

        size_t new_size = 0;
@ -1308,7 +1324,6 @@ public:
        Cell::State::readText(rb);

        destroyElements();
-        this->clearHasZero();
        m_size = 0;

        size_t new_size = 0;
@ -1342,7 +1357,6 @@ public:
    void clear()
    {
        destroyElements();
-        this->clearHasZero();
        m_size = 0;

        memset(static_cast<void*>(buf), 0, grower.bufSize() * sizeof(*buf));
@ -1353,7 +1367,6 @@ public:
    void clearAndShrink()
    {
        destroyElements();
-        this->clearHasZero();
        m_size = 0;
        free();
    }
--- a/src/Common/config.h.in
+++ b/src/Common/config.h.in
@ -52,4 +52,5 @@
 #cmakedefine01 USE_ODBC
 #cmakedefine01 USE_BORINGSSL
 #cmakedefine01 USE_BLAKE3
+#cmakedefine01 USE_SKIM
 #cmakedefine01 USE_OPENSSL_INTREE
--- a/src/Coordination/KeeperSnapshotManagerS3.cpp
+++ b/src/Coordination/KeeperSnapshotManagerS3.cpp
@ -136,7 +136,6 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const std::string & snapshot_pa
            return;

        S3Settings::RequestSettings request_settings_1;
-        request_settings_1.setEmptyFieldsByDefault();

        const auto create_writer = [&](const auto & key)
        {
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -77,6 +77,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
    M(UInt64, distributed_connections_pool_size, 1024, "Maximum number of connections with one remote server in the pool.", 0) \
    M(UInt64, connections_with_failover_max_tries, DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES, "The maximum number of attempts to connect to replicas.", 0) \
    M(UInt64, s3_min_upload_part_size, 16*1024*1024, "The minimum size of part to upload during multipart upload to S3.", 0) \
+    M(UInt64, s3_max_upload_part_size, 5ull*1024*1024*1024, "The maximum size of part to upload during multipart upload to S3.", 0) \
    M(UInt64, s3_upload_part_size_multiply_factor, 2, "Multiply s3_min_upload_part_size by this factor each time s3_multiply_parts_count_threshold parts were uploaded from a single write to S3.", 0) \
    M(UInt64, s3_upload_part_size_multiply_parts_count_threshold, 500, "Each time this number of parts was uploaded to S3 s3_min_upload_part_size multiplied by s3_upload_part_size_multiply_factor.", 0) \
    M(UInt64, s3_max_single_part_upload_size, 32*1024*1024, "The maximum size of object to upload using singlepart upload to S3.", 0) \
@ -645,6 +646,8 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
    M(Map, additional_table_filters, "", "Additional filter expression which would be applied after reading from specified table. Syntax: {'table1': 'expression', 'database.table2': 'expression'}", 0) \
    M(String, additional_result_filter, "", "Additional filter expression which would be applied to query result", 0) \
    \
+    M(String, workload, "default", "Name of workload to be used to access resources", 0) \
+    \
    /** Experimental functions */ \
    M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \
    M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@ -78,41 +78,43 @@ namespace SettingsChangesHistory
 /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
 static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
 {
-        {"22.12", {{"format_binary_max_string_size", 0, 1_GiB, "Prevent allocating large amount of memory"}}},
-        {"22.11", {{"use_structure_from_insertion_table_in_table_functions", 0, 2, "Improve using structure from insertion table in table functions"}}},
-        {"22.9", {{"force_grouping_standard_compatibility", false, true, "Make GROUPING function output the same as in SQL standard and other DBMS"}}},
-        {"22.7", {{"cross_to_inner_join_rewrite", 1, 2, "Force rewrite comma join to inner"},
-                  {"enable_positional_arguments", false, true, "Enable positional arguments feature by default"},
-                  {"format_csv_allow_single_quotes", true, false, "Most tools don't treat single quote in CSV specially, don't do it by default too"}}},
-        {"22.6", {{"output_format_json_named_tuples_as_objects", false, true, "Allow to serialize named tuples as JSON objects in JSON formats by default"},
-                  {"input_format_skip_unknown_fields", false, true, "Optimize reading subset of columns for some input formats"}}},
-        {"22.5", {{"memory_overcommit_ratio_denominator", 0, 1073741824, "Enable memory overcommit feature by default"},
-                  {"memory_overcommit_ratio_denominator_for_user", 0, 1073741824, "Enable memory overcommit feature by default"}}},
-        {"22.4", {{"allow_settings_after_format_in_insert", true, false, "Do not allow SETTINGS after FORMAT for INSERT queries because ClickHouse interpret SETTINGS as some values, which is misleading"}}},
-        {"22.3", {{"cast_ipv4_ipv6_default_on_conversion_error", true, false, "Make functions cast(value, 'IPv4') and cast(value, 'IPv6') behave same as toIPv4 and toIPv6 functions"}}},
-        {"21.12", {{"stream_like_engine_allow_direct_select", true, false, "Do not allow direct select for Kafka/RabbitMQ/FileLog by default"}}},
-        {"21.9", {{"output_format_decimal_trailing_zeros", true, false, "Do not output trailing zeros in text representation of Decimal types by default for better looking output"},
-                  {"use_hedged_requests", false, true, "Enable Hedged Requests feature by default"}}},
-        {"21.7", {{"legacy_column_name_of_tuple_literal", true, false, "Add this setting only for compatibility reasons. It makes sense to set to 'true', while doing rolling update of cluster from version lower than 21.7 to higher"}}},
-        {"21.5", {{"async_socket_for_remote", false, true, "Fix all problems and turn on asynchronous reads from socket for remote queries by default again"}}},
-        {"21.3", {{"async_socket_for_remote", true, false, "Turn off asynchronous reads from socket for remote queries because of some problems"},
-                  {"optimize_normalize_count_variants", false, true, "Rewrite aggregate functions that semantically equals to count() as count() by default"},
-                  {"normalize_function_names", false, true, "Normalize function names to their canonical names, this was needed for projection query routing"}}},
-        {"21.2", {{"enable_global_with_statement", false, true, "Propagate WITH statements to UNION queries and all subqueries by default"}}},
-        {"21.1", {{"insert_quorum_parallel", false, true, "Use parallel quorum inserts by default. It is significantly more convenient to use than sequential quorum inserts"},
-                  {"input_format_null_as_default", false, true, "Allow to insert NULL as default for input formats by default"},
-                  {"optimize_on_insert", false, true, "Enable data optimization on INSERT by default for better user experience"},
-                  {"use_compact_format_in_distributed_parts_names", false, true, "Use compact format for async INSERT into Distributed tables by default"}}},
-        {"20.10", {{"format_regexp_escaping_rule", "Escaped", "Raw", "Use Raw as default escaping rule for Regexp format to male the behaviour more like to what users expect"}}},
-        {"20.7", {{"show_table_uuid_in_table_create_query_if_not_nil", true, false, "Stop showing  UID of the table in its CREATE query for Engine=Atomic"}}},
-        {"20.5", {{"input_format_with_names_use_header", false, true, "Enable using header with names for formats with WithNames/WithNamesAndTypes suffixes"},
-                  {"allow_suspicious_codecs", true, false, "Don't allow to specify meaningless compression codecs"}}},
-        {"20.4", {{"validate_polygons", false, true, "Throw exception if polygon is invalid in function pointInPolygon by default instead of returning possibly wrong results"}}},
-        {"19.18", {{"enable_scalar_subquery_optimization", false, true, "Prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once"}}},
-        {"19.14", {{"any_join_distinct_right_table_keys", true, false, "Disable ANY RIGHT and ANY FULL JOINs by default to avoid inconsistency"}}},
-        {"19.12", {{"input_format_defaults_for_omitted_fields", false, true, "Enable calculation of complex default expressions for omitted fields for some input formats, because it should be the expected behaviour"}}},
-        {"19.5", {{"max_partitions_per_insert_block", 0, 100, "Add a limit for the number of partitions in one block"}}},
-        {"18.12.17", {{"enable_optimize_predicate_expression", 0, 1, "Optimize predicates to subqueries by default"}}},
+    {"22.12", {{"max_size_to_preallocate_for_aggregation", 10'000'000, 100'000'000, "This optimizes performance"},
+               {"query_plan_aggregation_in_order", 0, 1, "Enable some refactoring around query plan"},
+               {"format_binary_max_string_size", 0, 1_GiB, "Prevent allocating large amount of memory"}}},
+    {"22.11", {{"use_structure_from_insertion_table_in_table_functions", 0, 2, "Improve using structure from insertion table in table functions"}}},
+    {"22.9", {{"force_grouping_standard_compatibility", false, true, "Make GROUPING function output the same as in SQL standard and other DBMS"}}},
+    {"22.7", {{"cross_to_inner_join_rewrite", 1, 2, "Force rewrite comma join to inner"},
+              {"enable_positional_arguments", false, true, "Enable positional arguments feature by default"},
+              {"format_csv_allow_single_quotes", true, false, "Most tools don't treat single quote in CSV specially, don't do it by default too"}}},
+    {"22.6", {{"output_format_json_named_tuples_as_objects", false, true, "Allow to serialize named tuples as JSON objects in JSON formats by default"},
+              {"input_format_skip_unknown_fields", false, true, "Optimize reading subset of columns for some input formats"}}},
+    {"22.5", {{"memory_overcommit_ratio_denominator", 0, 1073741824, "Enable memory overcommit feature by default"},
+              {"memory_overcommit_ratio_denominator_for_user", 0, 1073741824, "Enable memory overcommit feature by default"}}},
+    {"22.4", {{"allow_settings_after_format_in_insert", true, false, "Do not allow SETTINGS after FORMAT for INSERT queries because ClickHouse interpret SETTINGS as some values, which is misleading"}}},
+    {"22.3", {{"cast_ipv4_ipv6_default_on_conversion_error", true, false, "Make functions cast(value, 'IPv4') and cast(value, 'IPv6') behave same as toIPv4 and toIPv6 functions"}}},
+    {"21.12", {{"stream_like_engine_allow_direct_select", true, false, "Do not allow direct select for Kafka/RabbitMQ/FileLog by default"}}},
+    {"21.9", {{"output_format_decimal_trailing_zeros", true, false, "Do not output trailing zeros in text representation of Decimal types by default for better looking output"},
+              {"use_hedged_requests", false, true, "Enable Hedged Requests feature by default"}}},
+    {"21.7", {{"legacy_column_name_of_tuple_literal", true, false, "Add this setting only for compatibility reasons. It makes sense to set to 'true', while doing rolling update of cluster from version lower than 21.7 to higher"}}},
+    {"21.5", {{"async_socket_for_remote", false, true, "Fix all problems and turn on asynchronous reads from socket for remote queries by default again"}}},
+    {"21.3", {{"async_socket_for_remote", true, false, "Turn off asynchronous reads from socket for remote queries because of some problems"},
+              {"optimize_normalize_count_variants", false, true, "Rewrite aggregate functions that semantically equals to count() as count() by default"},
+              {"normalize_function_names", false, true, "Normalize function names to their canonical names, this was needed for projection query routing"}}},
+    {"21.2", {{"enable_global_with_statement", false, true, "Propagate WITH statements to UNION queries and all subqueries by default"}}},
+    {"21.1", {{"insert_quorum_parallel", false, true, "Use parallel quorum inserts by default. It is significantly more convenient to use than sequential quorum inserts"},
+              {"input_format_null_as_default", false, true, "Allow to insert NULL as default for input formats by default"},
+              {"optimize_on_insert", false, true, "Enable data optimization on INSERT by default for better user experience"},
+              {"use_compact_format_in_distributed_parts_names", false, true, "Use compact format for async INSERT into Distributed tables by default"}}},
+    {"20.10", {{"format_regexp_escaping_rule", "Escaped", "Raw", "Use Raw as default escaping rule for Regexp format to male the behaviour more like to what users expect"}}},
+    {"20.7", {{"show_table_uuid_in_table_create_query_if_not_nil", true, false, "Stop showing  UID of the table in its CREATE query for Engine=Atomic"}}},
+    {"20.5", {{"input_format_with_names_use_header", false, true, "Enable using header with names for formats with WithNames/WithNamesAndTypes suffixes"},
+              {"allow_suspicious_codecs", true, false, "Don't allow to specify meaningless compression codecs"}}},
+    {"20.4", {{"validate_polygons", false, true, "Throw exception if polygon is invalid in function pointInPolygon by default instead of returning possibly wrong results"}}},
+    {"19.18", {{"enable_scalar_subquery_optimization", false, true, "Prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once"}}},
+    {"19.14", {{"any_join_distinct_right_table_keys", true, false, "Disable ANY RIGHT and ANY FULL JOINs by default to avoid inconsistency"}}},
+    {"19.12", {{"input_format_defaults_for_omitted_fields", false, true, "Enable calculation of complex default expressions for omitted fields for some input formats, because it should be the expected behaviour"}}},
+    {"19.5", {{"max_partitions_per_insert_block", 0, 100, "Add a limit for the number of partitions in one block"}}},
+    {"18.12.17", {{"enable_optimize_predicate_expression", 0, 1, "Optimize predicates to subqueries by default"}}},
 };

 }
--- a/src/Daemon/BaseDaemon.cpp
+++ b/src/Daemon/BaseDaemon.cpp
@ -30,6 +30,7 @@
 #include <Poco/Util/Application.h>
 #include <Poco/Exception.h>
 #include <Poco/ErrorHandler.h>
+#include <Poco/Pipe.h>

 #include <Common/ErrorHandlers.h>
 #include <base/argsToConfig.h>
@ -76,6 +77,7 @@ namespace DB
    {
        extern const int CANNOT_SET_SIGNAL_HANDLER;
        extern const int CANNOT_SEND_SIGNAL;
+        extern const int SYSTEM_ERROR;
    }
 }

@ -1007,11 +1009,15 @@ void BaseDaemon::setupWatchdog()

    while (true)
    {
+        /// This pipe is used to synchronize notifications to the service manager from the child process
+        /// to be sent after the notifications from the parent process.
+        Poco::Pipe notify_sync;
+
        static pid_t pid = -1;
        pid = fork();

        if (-1 == pid)
-            throw Poco::Exception("Cannot fork");
+            DB::throwFromErrno("Cannot fork", DB::ErrorCodes::SYSTEM_ERROR);

        if (0 == pid)
        {
@ -1019,10 +1025,36 @@ void BaseDaemon::setupWatchdog()
 #if defined(OS_LINUX)
            if (0 != prctl(PR_SET_PDEATHSIG, SIGKILL))
                logger().warning("Cannot do prctl to ask termination with parent.");
+
+            if (getppid() == 1)
+                throw Poco::Exception("Parent watchdog process has exited.");
 #endif
+
+            {
+                notify_sync.close(Poco::Pipe::CLOSE_WRITE);
+                /// Read from the pipe will block until the pipe is closed.
+                /// This way we synchronize with the parent process.
+                char buf[1];
+                if (0 != notify_sync.readBytes(buf, sizeof(buf)))
+                    throw Poco::Exception("Unexpected result while waiting for watchdog synchronization pipe to close.");
+            }
+
            return;
        }

+#if defined(OS_LINUX)
+        /// Tell the service manager the actual main process is not this one but the forked process
+        /// because it is going to be serving the requests and it is going to send "READY=1" notification
+        /// when it is fully started.
+        /// NOTE: we do this right after fork() and then notify the child process to "unblock" so that it finishes initialization
+        /// and sends "READY=1" after we have sent "MAINPID=..."
+        systemdNotify(fmt::format("MAINPID={}\n", pid));
+#endif
+
+        /// Close the pipe after notifying the service manager.
+        /// The child process is waiting for the pipe to be closed.
+        notify_sync.close();
+
        /// Change short thread name and process name.
        setThreadName("clckhouse-watch");   /// 15 characters

@ -1141,3 +1173,58 @@ String BaseDaemon::getStoredBinaryHash() const
 {
    return stored_binary_hash;
 }
+
+#if defined(OS_LINUX)
+void systemdNotify(const std::string_view & command)
+{
+    const char * path = getenv("NOTIFY_SOCKET");  // NOLINT(concurrency-mt-unsafe)
+
+    if (path == nullptr)
+        return; /// not using systemd
+
+    int s = socket(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0);
+
+    if (s == -1)
+        DB::throwFromErrno("Can't create UNIX socket for systemd notify.", DB::ErrorCodes::SYSTEM_ERROR);
+
+    SCOPE_EXIT({ close(s); });
+
+    const size_t len = strlen(path);
+
+    struct sockaddr_un addr;
+
+    addr.sun_family = AF_UNIX;
+
+    if (len < 2 || len > sizeof(addr.sun_path) - 1)
+        throw DB::Exception(DB::ErrorCodes::SYSTEM_ERROR, "NOTIFY_SOCKET env var value \"{}\" is wrong.", path);
+
+    memcpy(addr.sun_path, path, len + 1); /// write last zero as well.
+
+    size_t addrlen = offsetof(struct sockaddr_un, sun_path) + len;
+
+    /// '@' means this is Linux abstract socket, per documentation sun_path[0] must be set to '\0' for it.
+    if (path[0] == '@')
+        addr.sun_path[0] = 0;
+    else if (path[0] == '/')
+        addrlen += 1; /// non-abstract-addresses should be zero terminated.
+    else
+        throw DB::Exception(DB::ErrorCodes::SYSTEM_ERROR, "Wrong UNIX path \"{}\" in NOTIFY_SOCKET env var", path);
+
+    const struct sockaddr *sock_addr = reinterpret_cast <const struct sockaddr *>(&addr);
+
+    size_t sent_bytes_total = 0;
+    while (sent_bytes_total < command.size())
+    {
+        auto sent_bytes = sendto(s, command.data() + sent_bytes_total, command.size() - sent_bytes_total, 0, sock_addr, static_cast<socklen_t>(addrlen));
+        if (sent_bytes == -1)
+        {
+            if (errno == EINTR)
+                continue;
+            else
+                DB::throwFromErrno("Failed to notify systemd, sendto returned error.", DB::ErrorCodes::SYSTEM_ERROR);
+        }
+        else
+            sent_bytes_total += sent_bytes;
+    }
+}
+#endif
--- a/src/Daemon/BaseDaemon.h
+++ b/src/Daemon/BaseDaemon.h
@ -197,3 +197,9 @@ std::optional<std::reference_wrapper<Daemon>> BaseDaemon::tryGetInstance()
    else
        return {};
 }
+
+#if defined(OS_LINUX)
+/// Sends notification (e.g. "server is ready") to systemd, analogous to sd_notify from libsystemd.
+/// See https://www.freedesktop.org/software/systemd/man/sd_notify.html for more information on the supported notifications.
+void systemdNotify(const std::string_view & command);
+#endif
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
@ -512,7 +512,7 @@ void S3ObjectStorage::copyObjectMultipartImpl(

    std::vector<String> part_tags;

-    size_t upload_part_size = settings_ptr->request_settings.min_upload_part_size;
+    size_t upload_part_size = settings_ptr->request_settings.getUploadSettings().min_upload_part_size;
    for (size_t position = 0, part_number = 1; position < size; ++part_number, position += upload_part_size)
    {
        ProfileEvents::increment(ProfileEvents::S3UploadPartCopy);
--- a/src/Disks/ObjectStorages/S3/diskSettings.cpp
+++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp
@ -34,24 +34,7 @@ namespace ErrorCodes
 std::unique_ptr<S3ObjectStorageSettings> getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context)
 {
    const Settings & settings = context->getSettingsRef();
-    S3Settings::RequestSettings request_settings;
-    request_settings.max_single_read_retries = config.getUInt64(config_prefix + ".s3_max_single_read_retries", settings.s3_max_single_read_retries);
-    request_settings.min_upload_part_size = config.getUInt64(config_prefix + ".s3_min_upload_part_size", settings.s3_min_upload_part_size);
-    request_settings.max_upload_part_size = config.getUInt64(config_prefix + ".s3_max_upload_part_size", S3Settings::RequestSettings::DEFAULT_MAX_UPLOAD_PART_SIZE);
-    request_settings.upload_part_size_multiply_factor = config.getUInt64(config_prefix + ".s3_upload_part_size_multiply_factor", settings.s3_upload_part_size_multiply_factor);
-    request_settings.upload_part_size_multiply_parts_count_threshold = config.getUInt64(config_prefix + ".s3_upload_part_size_multiply_parts_count_threshold", settings.s3_upload_part_size_multiply_parts_count_threshold);
-    request_settings.max_part_number = config.getUInt64(config_prefix + ".s3_max_part_number", S3Settings::RequestSettings::DEFAULT_MAX_PART_NUMBER);
-    request_settings.max_single_part_upload_size = config.getUInt64(config_prefix + ".s3_max_single_part_upload_size", settings.s3_max_single_part_upload_size);
-    request_settings.check_objects_after_upload = config.getUInt64(config_prefix + ".s3_check_objects_after_upload", settings.s3_check_objects_after_upload);
-    request_settings.max_unexpected_write_error_retries = config.getUInt64(config_prefix + ".s3_max_unexpected_write_error_retries", settings.s3_max_unexpected_write_error_retries);
-
-    // NOTE: it would be better to reuse old throttlers to avoid losing token bucket state on every config reload, which could lead to exceeding limit for short time. But it is good enough unless very high `burst` values are used.
-    if (UInt64 max_get_rps = config.getUInt64(config_prefix + ".s3_max_get_rps", settings.s3_max_get_rps))
-        request_settings.get_request_throttler = std::make_shared<Throttler>(
-            max_get_rps, config.getUInt64(config_prefix + ".s3_max_get_burst", settings.s3_max_get_burst ? settings.s3_max_get_burst : Throttler::default_burst_seconds * max_get_rps));
-    if (UInt64 max_put_rps = config.getUInt64(config_prefix + ".s3_max_put_rps", settings.s3_max_put_rps))
-        request_settings.put_request_throttler = std::make_shared<Throttler>(
-            max_put_rps, config.getUInt64(config_prefix + ".s3_max_put_burst", settings.s3_max_put_burst ? settings.s3_max_put_burst : Throttler::default_burst_seconds * max_put_rps));
+    S3Settings::RequestSettings request_settings(config, config_prefix, settings, "s3_");

    return std::make_unique<S3ObjectStorageSettings>(
        request_settings,
--- a/src/Functions/FunctionsHashing.h
+++ b/src/Functions/FunctionsHashing.h
@ -668,7 +668,7 @@ struct ImplBLAKE3
        {
            auto err_st = std::string(err_msg);
            blake3_free_char_pointer(err_msg);
-            throw Exception("Function returned error message: " + std::string(err_msg), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+            throw Exception("Function returned error message: " + err_st, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
        }
    }
    #endif
--- a/src/Functions/array/range.cpp
+++ b/src/Functions/array/range.cpp
@ -9,6 +9,7 @@
 #include <Interpreters/castColumn.h>
 #include <Interpreters/Context.h>
 #include <numeric>
+#include <vector>


 namespace DB
@ -56,7 +57,7 @@ private:

        for (const auto & arg : arguments)
        {
-            if (!isUnsignedInteger(arg))
+            if (!isInteger(arg))
                throw Exception{"Illegal type " + arg->getName() + " of argument of function " + getName(),
                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
        }
@ -72,8 +73,12 @@ private:
        {
            const auto & in_data = in->getData();
            const auto total_values = std::accumulate(std::begin(in_data), std::end(in_data), size_t{},
-                [this] (const size_t lhs, const size_t rhs)
+                [this] (const size_t lhs, const T rhs)
                {
+                    if (rhs < 0)
+                        throw Exception{"A call to function " + getName() + " overflows, only support positive values when only end is provided",
+                            ErrorCodes::ARGUMENT_OUT_OF_BOUND};
+
                    const auto sum = lhs + rhs;
                    if (sum < lhs)
                        throw Exception{"A call to function " + getName() + " overflows, investigate the values of arguments you are passing",
@ -96,7 +101,7 @@ private:
            IColumn::Offset offset{};
            for (size_t row_idx = 0, rows = in->size(); row_idx < rows; ++row_idx)
            {
-                for (size_t elem_idx = 0, elems = in_data[row_idx]; elem_idx < elems; ++elem_idx)
+                for (T elem_idx = 0, elems = in_data[row_idx]; elem_idx < elems; ++elem_idx)
                    out_data[offset + elem_idx] = static_cast<T>(elem_idx);

                offset += in_data[row_idx];
@ -121,15 +126,20 @@ private:

        size_t total_values = 0;
        size_t pre_values = 0;
+        std::vector<size_t> row_length(input_rows_count);

        for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx)
        {
-            if (start < end_data[row_idx] && step == 0)
+            if (step == 0)
                throw Exception{"A call to function " + getName() + " overflows, the 3rd argument step can't be zero",
                            ErrorCodes::ARGUMENT_OUT_OF_BOUND};

-            pre_values += start >= end_data[row_idx] ? 0
-                            : (end_data[row_idx] - start - 1) / step + 1;
+            if (start < end_data[row_idx] && step > 0)
+                row_length[row_idx] = (static_cast<__int128_t>(end_data[row_idx]) - static_cast<__int128_t>(start) - 1) / static_cast<__int128_t>(step) + 1;
+            else if (start > end_data[row_idx] && step < 0)
+                row_length[row_idx] = (static_cast<__int128_t>(end_data[row_idx]) - static_cast<__int128_t>(start) + 1) / static_cast<__int128_t>(step) + 1;
+
+            pre_values += row_length[row_idx];

            if (pre_values < total_values)
                throw Exception{"A call to function " + getName() + " overflows, investigate the values of arguments you are passing",
@ -151,15 +161,8 @@ private:
        IColumn::Offset offset{};
        for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx)
        {
-            for (size_t st = start, ed = end_data[row_idx]; st < ed; st += step)
-            {
-                out_data[offset++] = static_cast<T>(st);
-
-                if (st > st + step)
-                    throw Exception{"A call to function " + getName() + " overflows, investigate the values of arguments you are passing",
-                                ErrorCodes::ARGUMENT_OUT_OF_BOUND};
-            }
-
+            for (size_t idx = 0; idx < row_length[row_idx]; idx++)
+                out_data[offset++] = static_cast<T>(start + offset * step);
            out_offsets[row_idx] = offset;
        }

@ -180,19 +183,25 @@ private:

        size_t total_values = 0;
        size_t pre_values = 0;
+        std::vector<size_t> row_length(input_rows_count);

        for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx)
        {
-            if (start_data[row_idx] < end_data[row_idx] && step == 0)
+            if (step == 0)
                throw Exception{"A call to function " + getName() + " overflows, the 3rd argument step can't be zero",
                            ErrorCodes::ARGUMENT_OUT_OF_BOUND};

-            pre_values += start_data[row_idx] >= end_data[row_idx] ? 0
-                            : (end_data[row_idx] - start_data[row_idx] - 1) / step + 1;
+            if (start_data[row_idx] < end_data[row_idx] && step > 0)
+                row_length[row_idx] = (static_cast<__int128_t>(end_data[row_idx]) - static_cast<__int128_t>(start_data[row_idx]) - 1) / static_cast<__int128_t>(step) + 1;
+            else if (start_data[row_idx] > end_data[row_idx] && step < 0)
+                row_length[row_idx] = (static_cast<__int128_t>(end_data[row_idx]) - static_cast<__int128_t>(start_data[row_idx]) + 1) / static_cast<__int128_t>(step) + 1;
+
+
+            pre_values += row_length[row_idx];

            if (pre_values < total_values)
                throw Exception{"A call to function " + getName() + " overflows, investigate the values of arguments you are passing",
-                            ErrorCodes::ARGUMENT_OUT_OF_BOUND};
+                    ErrorCodes::ARGUMENT_OUT_OF_BOUND};

            total_values = pre_values;
            if (total_values > max_elements)
@ -210,15 +219,8 @@ private:
        IColumn::Offset offset{};
        for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx)
        {
-            for (size_t st = start_data[row_idx], ed = end_data[row_idx]; st < ed; st += step)
-            {
-                out_data[offset++] = static_cast<T>(st);
-
-                if (st > st + step)
-                    throw Exception{"A call to function " + getName() + " overflows, investigate the values of arguments you are passing",
-                                ErrorCodes::ARGUMENT_OUT_OF_BOUND};
-            }
-
+            for (size_t idx = 0; idx < row_length[row_idx]; idx++)
+                out_data[offset++] = static_cast<T>(start_data[row_idx] + idx * step);
            out_offsets[row_idx] = offset;
        }

@ -239,15 +241,20 @@ private:

        size_t total_values = 0;
        size_t pre_values = 0;
+        std::vector<size_t> row_length(input_rows_count);

        for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx)
        {
-            if (start < end_data[row_idx] && step_data[row_idx] == 0)
+            if (step_data[row_idx] == 0)
                throw Exception{"A call to function " + getName() + " overflows, the 3rd argument step can't be zero",
-                            ErrorCodes::ARGUMENT_OUT_OF_BOUND};
+                    ErrorCodes::ARGUMENT_OUT_OF_BOUND};

-            pre_values += start >= end_data[row_idx] ? 0
-                            : (end_data[row_idx] - start - 1) / step_data[row_idx] + 1;
+            if (start < end_data[row_idx] && step_data[row_idx] > 0)
+                row_length[row_idx] = (static_cast<__int128_t>(end_data[row_idx]) - static_cast<__int128_t>(start) - 1) / static_cast<__int128_t>(step_data[row_idx]) + 1;
+            else if (start > end_data[row_idx] && step_data[row_idx] < 0)
+                row_length[row_idx] = (static_cast<__int128_t>(end_data[row_idx]) - static_cast<__int128_t>(start) + 1) / static_cast<__int128_t>(step_data[row_idx]) + 1;
+
+            pre_values += row_length[row_idx];

            if (pre_values < total_values)
                throw Exception{"A call to function " + getName() + " overflows, investigate the values of arguments you are passing",
@ -269,15 +276,8 @@ private:
        IColumn::Offset offset{};
        for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx)
        {
-            for (size_t st = start, ed = end_data[row_idx]; st < ed; st += step_data[row_idx])
-            {
-                out_data[offset++] = static_cast<T>(st);
-
-                if (st > st + step_data[row_idx])
-                    throw Exception{"A call to function " + getName() + " overflows, investigate the values of arguments you are passing",
-                                ErrorCodes::ARGUMENT_OUT_OF_BOUND};
-            }
-
+            for (size_t idx = 0; idx < row_length[row_idx]; idx++)
+                out_data[offset++] = static_cast<T>(start + offset * step_data[row_idx]);
            out_offsets[row_idx] = offset;
        }

@ -301,15 +301,19 @@ private:

        size_t total_values = 0;
        size_t pre_values = 0;
+        std::vector<size_t> row_length(input_rows_count);

        for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx)
        {
-            if (start_data[row_idx] < end_start[row_idx] && step_data[row_idx] == 0)
-                throw Exception{"A call to function " + getName() + " overflows, the 3rd argument step can't be zero",
+            if (step_data[row_idx] == 0)
+                throw Exception{"A call to function " + getName() + " overflows, the 3rd argument step can't less or equal to zero",
                            ErrorCodes::ARGUMENT_OUT_OF_BOUND};
+            if (start_data[row_idx] < end_start[row_idx] && step_data[row_idx] > 0)
+                row_length[row_idx] = (static_cast<__int128_t>(end_start[row_idx]) - static_cast<__int128_t>(start_data[row_idx]) - 1) / static_cast<__int128_t>(step_data[row_idx]) + 1;
+            else if (start_data[row_idx] > end_start[row_idx] && step_data[row_idx] < 0)
+                row_length[row_idx] = (static_cast<__int128_t>(end_start[row_idx]) - static_cast<__int128_t>(start_data[row_idx]) + 1) / static_cast<__int128_t>(step_data[row_idx]) + 1;

-            pre_values += start_data[row_idx] >= end_start[row_idx] ? 0
-                            : (end_start[row_idx] -start_data[row_idx] - 1) / (step_data[row_idx]) + 1;
+            pre_values += row_length[row_idx];

            if (pre_values < total_values)
                throw Exception{"A call to function " + getName() + " overflows, investigate the values of arguments you are passing",
@ -331,15 +335,8 @@ private:
        IColumn::Offset offset{};
        for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx)
        {
-            for (size_t st = start_data[row_idx], ed = end_start[row_idx]; st < ed; st += step_data[row_idx])
-            {
-                out_data[offset++] = static_cast<T>(st);
-
-                if (st > st + step_data[row_idx])
-                    throw Exception{"A call to function " + getName() + " overflows, investigate the values of arguments you are passing",
-                                ErrorCodes::ARGUMENT_OUT_OF_BOUND};
-            }
-
+            for (size_t idx = 0; idx < row_length[row_idx]; idx++)
+                out_data[offset++] = static_cast<T>(start_data[row_idx] + idx * step_data[row_idx]);
            out_offsets[row_idx] = offset;
        }

@ -351,23 +348,20 @@ private:
        DataTypePtr elem_type = checkAndGetDataType<DataTypeArray>(result_type.get())->getNestedType();
        WhichDataType which(elem_type);

-        if (!which.isUInt8()
-            && !which.isUInt16()
-            && !which.isUInt32()
-            && !which.isUInt64())
+        if (!which.isNativeUInt() && !which.isNativeInt())
        {
            throw Exception{"Illegal columns of arguments of function " + getName()
-                + ", the function only implemented for unsigned integers up to 64 bit", ErrorCodes::ILLEGAL_COLUMN};
+                            + ", the function only implemented for unsigned/signed integers up to 64 bit",
+                            ErrorCodes::ILLEGAL_COLUMN};
        }

        ColumnPtr res;
        if (arguments.size() == 1)
        {
            const auto * col = arguments[0].column.get();
-            if (!((res = executeInternal<UInt8>(col))
-                || (res = executeInternal<UInt16>(col))
-                || (res = executeInternal<UInt32>(col))
-                || (res = executeInternal<UInt64>(col))))
+            if (!((res = executeInternal<UInt8>(col)) || (res = executeInternal<UInt16>(col)) || (res = executeInternal<UInt32>(col))
+                  || (res = executeInternal<UInt64>(col)) || (res = executeInternal<Int8>(col)) || (res = executeInternal<Int16>(col))
+                  || (res = executeInternal<Int32>(col)) || (res = executeInternal<Int64>(col))))
            {
                throw Exception{"Illegal column " + col->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN};
            }
@ -402,44 +396,93 @@ private:
        bool is_step_const = isColumnConst(*column_ptrs[2]);
        if (is_start_const && is_step_const)
        {
-            UInt64 start = assert_cast<const ColumnConst &>(*column_ptrs[0]).getUInt(0);
-            UInt64 step = assert_cast<const ColumnConst &>(*column_ptrs[2]).getUInt(0);
-
-            if ((res = executeConstStartStep<UInt8>(column_ptrs[1], start, step, input_rows_count)) ||
-                (res = executeConstStartStep<UInt16>(column_ptrs[1], start, step, input_rows_count)) ||
-                (res = executeConstStartStep<UInt32>(column_ptrs[1], static_cast<UInt32>(start), static_cast<UInt32>(step), input_rows_count)) ||
-                (res = executeConstStartStep<UInt64>(column_ptrs[1], start, step, input_rows_count)))
+            if (which.isNativeUInt())
            {
+                UInt64 start = assert_cast<const ColumnConst &>(*column_ptrs[0]).getUInt(0);
+                UInt64 step = assert_cast<const ColumnConst &>(*column_ptrs[2]).getUInt(0);
+
+                if ((res = executeConstStartStep<UInt8>(column_ptrs[1], start, step, input_rows_count))
+                    || (res = executeConstStartStep<UInt16>(column_ptrs[1], start, step, input_rows_count))
+                    || (res = executeConstStartStep<UInt32>(
+                            column_ptrs[1], static_cast<UInt32>(start), static_cast<UInt32>(step), input_rows_count))
+                    || (res = executeConstStartStep<UInt64>(column_ptrs[1], start, step, input_rows_count)))
+                {
+                }
+            }
+            else if (which.isNativeInt())
+            {
+                Int64 start = assert_cast<const ColumnConst &>(*column_ptrs[0]).getInt(0);
+                Int64 step = assert_cast<const ColumnConst &>(*column_ptrs[2]).getInt(0);
+
+                if ((res = executeConstStartStep<Int8>(column_ptrs[1], start, step, input_rows_count))
+                    || (res = executeConstStartStep<Int16>(column_ptrs[1], start, step, input_rows_count))
+                    || (res = executeConstStartStep<Int32>(
+                            column_ptrs[1], static_cast<Int32>(start), static_cast<Int32>(step), input_rows_count))
+                    || (res = executeConstStartStep<Int64>(column_ptrs[1], start, step, input_rows_count)))
+                {
+                }
            }
        }
        else if (is_start_const && !is_step_const)
        {
-            UInt64 start = assert_cast<const ColumnConst &>(*column_ptrs[0]).getUInt(0);
-
-            if ((res = executeConstStart<UInt8>(column_ptrs[1], column_ptrs[2], start, input_rows_count)) ||
-                (res = executeConstStart<UInt16>(column_ptrs[1], column_ptrs[2], start, input_rows_count)) ||
-                (res = executeConstStart<UInt32>(column_ptrs[1], column_ptrs[2], static_cast<UInt32>(start), input_rows_count)) ||
-                (res = executeConstStart<UInt64>(column_ptrs[1], column_ptrs[2], start, input_rows_count)))
+            if (which.isNativeUInt())
            {
+                UInt64 start = assert_cast<const ColumnConst &>(*column_ptrs[0]).getUInt(0);
+
+                if ((res = executeConstStart<UInt8>(column_ptrs[1], column_ptrs[2], start, input_rows_count))
+                    || (res = executeConstStart<UInt16>(column_ptrs[1], column_ptrs[2], start, input_rows_count))
+                    || (res = executeConstStart<UInt32>(column_ptrs[1], column_ptrs[2], static_cast<UInt32>(start), input_rows_count))
+                    || (res = executeConstStart<UInt64>(column_ptrs[1], column_ptrs[2], start, input_rows_count)))
+                {
+                }
+            }
+            else if (which.isNativeInt())
+            {
+                Int64 start = assert_cast<const ColumnConst &>(*column_ptrs[0]).getInt(0);
+
+                if ((res = executeConstStart<Int8>(column_ptrs[1], column_ptrs[2], start, input_rows_count))
+                    || (res = executeConstStart<Int16>(column_ptrs[1], column_ptrs[2], start, input_rows_count))
+                    || (res = executeConstStart<Int32>(column_ptrs[1], column_ptrs[2], static_cast<Int32>(start), input_rows_count))
+                    || (res = executeConstStart<Int64>(column_ptrs[1], column_ptrs[2], start, input_rows_count)))
+                {
+                }
            }
        }
        else if (!is_start_const && is_step_const)
        {
-            UInt64 step = assert_cast<const ColumnConst &>(*column_ptrs[2]).getUInt(0);
-
-            if ((res = executeConstStep<UInt8>(column_ptrs[0], column_ptrs[1], step, input_rows_count)) ||
-                (res = executeConstStep<UInt16>(column_ptrs[0], column_ptrs[1], step, input_rows_count)) ||
-                (res = executeConstStep<UInt32>(column_ptrs[0], column_ptrs[1], static_cast<UInt32>(step), input_rows_count)) ||
-                (res = executeConstStep<UInt64>(column_ptrs[0], column_ptrs[1], step, input_rows_count)))
+            if (which.isNativeUInt())
            {
+                UInt64 step = assert_cast<const ColumnConst &>(*column_ptrs[2]).getUInt(0);
+
+                if ((res = executeConstStep<UInt8>(column_ptrs[0], column_ptrs[1], step, input_rows_count))
+                    || (res = executeConstStep<UInt16>(column_ptrs[0], column_ptrs[1], step, input_rows_count))
+                    || (res = executeConstStep<UInt32>(column_ptrs[0], column_ptrs[1], static_cast<UInt32>(step), input_rows_count))
+                    || (res = executeConstStep<UInt64>(column_ptrs[0], column_ptrs[1], step, input_rows_count)))
+                {
+                }
+            }
+            else if (which.isNativeInt())
+            {
+                Int64 step = assert_cast<const ColumnConst &>(*column_ptrs[2]).getInt(0);
+
+                if ((res = executeConstStep<Int8>(column_ptrs[0], column_ptrs[1], step, input_rows_count))
+                    || (res = executeConstStep<Int16>(column_ptrs[0], column_ptrs[1], step, input_rows_count))
+                    || (res = executeConstStep<Int32>(column_ptrs[0], column_ptrs[1], static_cast<Int32>(step), input_rows_count))
+                    || (res = executeConstStep<Int64>(column_ptrs[0], column_ptrs[1], step, input_rows_count)))
+                {
+                }
            }
        }
        else
        {
-            if ((res = executeGeneric<UInt8>(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count)) ||
-                (res = executeGeneric<UInt16>(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count)) ||
-                (res = executeGeneric<UInt32>(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count)) ||
-                (res = executeGeneric<UInt64>(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count)))
+            if ((res = executeGeneric<UInt8>(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count))
+                || (res = executeGeneric<UInt16>(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count))
+                || (res = executeGeneric<UInt32>(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count))
+                || (res = executeGeneric<UInt64>(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count))
+                || (res = executeGeneric<Int8>(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count))
+                || (res = executeGeneric<Int16>(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count))
+                || (res = executeGeneric<Int32>(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count))
+                || (res = executeGeneric<Int64>(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count)))
            {
            }
        }
--- a/src/Functions/blockNumber.cpp
+++ b/src/Functions/blockNumber.cpp
@ -61,7 +61,7 @@ public:

    ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override
    {
-        size_t current_columns_number = columns_number++;
+        size_t current_columns_number = columns_number.fetch_add(1, std::memory_order_relaxed);
        return ColumnUInt64::create(input_rows_count, current_columns_number);
    }
 };
--- a/src/Functions/divide/divideImpl.cpp
+++ b/src/Functions/divide/divideImpl.cpp
@ -1,6 +1,7 @@
 /// This translation unit should be compiled multiple times
 /// with different values of NAMESPACE and machine flags (sse2, avx2).

+/// See also <libdivide-config.h>
 #if defined(__AVX2__)
    #define REG_SIZE 32
    #define LIBDIVIDE_AVX2
--- a/src/Functions/modulo.cpp
+++ b/src/Functions/modulo.cpp
@ -1,16 +1,7 @@
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionBinaryArithmetic.h>

-#if defined(__SSE2__)
-#    define LIBDIVIDE_SSE2
-#elif defined(__AVX512F__) || defined(__AVX512BW__) || defined(__AVX512VL__)
-#    define LIBDIVIDE_AVX512
-#elif defined(__AVX2__)
-#    define LIBDIVIDE_AVX2
-#elif defined(__aarch64__) && defined(__ARM_NEON)
-#    define LIBDIVIDE_NEON
-#endif
-
+#include <libdivide-config.h>
 #include <libdivide.h>


--- a/src/IO/IResourceManager.h
+++ b/src/IO/IResourceManager.h
@ -0,0 +1,53 @@
+#pragma once
+
+#include <IO/ResourceRequest.h>
+
+#include <Poco/Util/AbstractConfiguration.h>
+
+#include <boost/noncopyable.hpp>
+
+#include <memory>
+#include <unordered_map>
+
+namespace DB
+{
+
+/*
+ * Instance of derived class holds everything required for resource consumption,
+ * including resources currently registered at `SchedulerRoot`. This is required to avoid
+ * problems during configuration update. Do not hold instances longer than required.
+ * Should be created on query start and destructed when query is done.
+ */
+class IClassifier : private boost::noncopyable
+{
+public:
+    virtual ~IClassifier() {}
+
+    /// Returns ResouceLink that should be used to access resource.
+    /// Returned link is valid until classifier destruction.
+    virtual ResourceLink get(const String & resource_name) = 0;
+};
+
+using ClassifierPtr = std::shared_ptr<IClassifier>;
+
+/*
+ * Represents control plane of resource scheduling. Derived class is responsible for reading
+ * configuration, creating all required `ISchedulerNode` objects and
+ * managing their lifespan.
+ */
+class IResourceManager : private boost::noncopyable
+{
+public:
+    virtual ~IResourceManager() {}
+
+    /// Initialize or reconfigure manager.
+    virtual void updateConfiguration(const Poco::Util::AbstractConfiguration & config) = 0;
+
+    /// Obtain a classifier instance required to get access to resources.
+    /// Note that it holds resource configuration, so should be destructed when query is done.
+    virtual ClassifierPtr acquire(const String & classifier_name) = 0;
+};
+
+using ResourceManagerPtr = std::shared_ptr<IResourceManager>;
+
+}
--- a/src/IO/ISchedulerConstraint.h
+++ b/src/IO/ISchedulerConstraint.h
@ -0,0 +1,55 @@
+#pragma once
+
+#include <IO/ISchedulerNode.h>
+
+namespace DB
+{
+
+/*
+ * Constraint defined on the set of requests in consumption state.
+ * It allows to track two events:
+ *  - dequeueRequest(): resource consumption begins
+ *  - finishRequest(): resource consumption finishes
+ * This allows to keep track of in-flight requests and implement different constraints (e.g. in-flight limit).
+ * When constraint is violated, node must be deactivated by dequeueRequest() returning `false`.
+ * When constraint is again satisfied, scheduleActivation() is called from finishRequest().
+ *
+ * Derived class behaviour requirements:
+ *  - dequeueRequest() must fill `request->constraint` iff it is nullptr;
+ *  - finishRequest() must be recursive: call to `parent_constraint->finishRequest()`.
+ */
+class ISchedulerConstraint : public ISchedulerNode
+{
+public:
+    ISchedulerConstraint(EventQueue * event_queue_, const Poco::Util::AbstractConfiguration & config = emptyConfig(), const String & config_prefix = {})
+        : ISchedulerNode(event_queue_, config, config_prefix)
+    {}
+
+    /// Resource consumption by `request` is finished.
+    /// Should be called outside of scheduling subsystem, implementation must be thread-safe.
+    virtual void finishRequest(ResourceRequest * request) = 0;
+
+    void setParent(ISchedulerNode * parent_) override
+    {
+        ISchedulerNode::setParent(parent_);
+
+        // Assign `parent_constraint` to the nearest parent derived from ISchedulerConstraint
+        for (ISchedulerNode * node = parent_; node != nullptr; node = node->parent)
+        {
+            if (auto * constraint = dynamic_cast<ISchedulerConstraint *>(node))
+            {
+                parent_constraint = constraint;
+                break;
+            }
+        }
+    }
+
+protected:
+    // Reference to nearest parent that is also derived from ISchedulerConstraint.
+    // Request can traverse through multiple constraints while being dequeue from hierarchy,
+    // while finishing request should traverse the same chain in reverse order.
+    // NOTE: it must be immutable after initialization, because it is accessed in not thread-safe way from finishRequest()
+    ISchedulerConstraint * parent_constraint = nullptr;
+};
+
+}
--- a/src/IO/ISchedulerNode.h
+++ b/src/IO/ISchedulerNode.h
@ -0,0 +1,221 @@
+#pragma once
+
+#include <Common/ErrorCodes.h>
+#include <Common/Exception.h>
+
+#include <IO/ResourceRequest.h>
+#include <Poco/Util/AbstractConfiguration.h>
+#include <Poco/Util/XMLConfiguration.h>
+
+#include <boost/noncopyable.hpp>
+
+#include <deque>
+#include <functional>
+#include <memory>
+#include <mutex>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int INVALID_SCHEDULER_NODE;
+}
+
+class ISchedulerNode;
+
+inline const Poco::Util::AbstractConfiguration & emptyConfig()
+{
+    static Poco::AutoPtr<Poco::Util::XMLConfiguration> config = new Poco::Util::XMLConfiguration();
+    return *config;
+}
+
+/*
+ * Info read and write for scheduling purposes by parent
+ */
+struct SchedulerNodeInfo
+{
+    double weight = 1.0; /// Weight of this node among it's siblings
+    Int64 priority = 0; /// Priority of this node among it's siblings (higher value means higher priority)
+
+    /// Arbitrary data accessed/stored by parent
+    union {
+        size_t idx;
+        void * ptr;
+    } parent;
+
+    SchedulerNodeInfo() = default;
+
+    explicit SchedulerNodeInfo(const Poco::Util::AbstractConfiguration & config = emptyConfig(), const String & config_prefix = {})
+    {
+        setWeight(config.getDouble(config_prefix + ".weight", weight));
+        setPriority(config.getInt64(config_prefix + ".priority", priority));
+    }
+
+    void setWeight(double value)
+    {
+        if (value <= 0 || !isfinite(value))
+            throw Exception(
+                ErrorCodes::INVALID_SCHEDULER_NODE,
+                "Negative and non-finite node weights are not allowed: {}",
+                value);
+        weight = value;
+    }
+
+    void setPriority(Int64 value)
+    {
+        priority = value;
+    }
+};
+
+/*
+ * Simple waitable thread-safe FIFO task queue.
+ * Intended to hold postponed events for later handling (usually by scheduler thread).
+ */
+class EventQueue
+{
+public:
+    using Event = std::function<void()>;
+
+    void enqueue(Event&& event)
+    {
+        std::unique_lock lock{mutex};
+        bool was_empty = queue.empty();
+        queue.emplace_back(event);
+        if (was_empty)
+            pending.notify_one();
+    }
+
+    /// Process single event if it exists
+    /// Returns `true` iff event has been processed
+    bool tryProcess()
+    {
+        std::unique_lock lock{mutex};
+        if (queue.empty())
+            return false;
+        Event event = std::move(queue.front());
+        queue.pop_front();
+        lock.unlock(); // do not hold queue mutext while processing events
+        event();
+        return true;
+    }
+
+    /// Wait for single event (if not available) and process it
+    void process()
+    {
+        std::unique_lock lock{mutex};
+        pending.wait(lock, [&] { return !queue.empty(); });
+        Event event = std::move(queue.front());
+        queue.pop_front();
+        lock.unlock(); // do not hold queue mutext while processing events
+        event();
+    }
+
+private:
+    std::mutex mutex;
+    std::condition_variable pending;
+    std::deque<Event> queue;
+};
+
+/*
+ * Node of hierarchy for scheduling requests for resource. Base class for all
+ * kinds of scheduling elements (queues, policies, constraints and schedulers).
+ *
+ * Root node is a scheduler, which has it's thread to dequeue requests,
+ * execute requests (see ResourceRequest) and process events in a thread-safe manner.
+ * Immediate children of the scheduler represent independent resources.
+ * Each resource has it's own hierarchy to achieve required scheduling policies.
+ * Non-leaf nodes do not hold requests, but keep scheduling state
+ * (e.g. consumption history, amount of in-flight requests, etc).
+ * Leafs of hierarchy are queues capable of holding pending requests.
+ *
+ *        scheduler         (SchedulerRoot)
+ *         /     \
+ *  constraint  constraint  (SemaphoreConstraint)
+ *      |           |
+ *   policy      policy     (PriorityPolicy)
+ *   /    \      /    \
+ *  q1    q2    q3    q4    (FifoQueue)
+ *
+ * Dequeueing request from an inner node will dequeue request from one of active leaf-queues in its subtree.
+ * Node is considered to be active iff:
+ *  - it has at least one pending request in one of leaves of it's subtree;
+ *  - and enforced constraints, if any, are satisfied
+ *    (e.g. amount of concurrent requests is not greater than some number).
+ *
+ * All methods must be called only from scheduler thread for thread-safety.
+ */
+class ISchedulerNode : private boost::noncopyable
+{
+public:
+    ISchedulerNode(EventQueue * event_queue_, const Poco::Util::AbstractConfiguration & config = emptyConfig(), const String & config_prefix = {})
+        : event_queue(event_queue_)
+        , info(config, config_prefix)
+    {}
+
+    virtual ~ISchedulerNode() {}
+
+    // Checks if two nodes configuration is equal
+    virtual bool equals(ISchedulerNode * other) = 0;
+
+    /// Attach new child
+    virtual void attachChild(const std::shared_ptr<ISchedulerNode> & child) = 0;
+
+    /// Detach and destroy child
+    virtual void removeChild(ISchedulerNode * child) = 0;
+
+    /// Get attached child by name
+    virtual ISchedulerNode * getChild(const String & child_name) = 0;
+
+    /// Activation of child due to the first pending request
+    /// Should be called on leaf node (i.e. queue) to propagate activation signal through chain to the root
+    virtual void activateChild(ISchedulerNode * child) = 0;
+
+    /// Returns true iff node is active
+    virtual bool isActive() = 0;
+
+    /// Returns the first request to be executed as the first component of resuting pair.
+    /// The second pair component is `true` iff node is still active after dequeueing.
+    virtual std::pair<ResourceRequest *, bool> dequeueRequest() = 0;
+
+    /// Returns full path string using names of every parent
+    String getPath()
+    {
+        String result;
+        ISchedulerNode * ptr = this;
+        while (ptr->parent)
+        {
+            result = "/" + ptr->basename + result;
+            ptr = ptr->parent;
+        }
+        return result.empty() ? "/" : result;
+    }
+
+    /// Attach to a parent (used by attachChild)
+    virtual void setParent(ISchedulerNode * parent_)
+    {
+        parent = parent_;
+    }
+
+protected:
+    /// Notify parents about the first pending request or constraint becoming satisfied.
+    /// Postponed to be handled in scheduler thread, so it is intended to be called from outside.
+    void scheduleActivation()
+    {
+        if (likely(parent))
+        {
+            event_queue->enqueue([this] { parent->activateChild(this); });
+        }
+    }
+
+public:
+    EventQueue * const event_queue;
+    String basename;
+    SchedulerNodeInfo info;
+    ISchedulerNode * parent = nullptr;
+};
+
+using SchedulerNodePtr = std::shared_ptr<ISchedulerNode>;
+
+}
--- a/src/IO/ISchedulerQueue.h
+++ b/src/IO/ISchedulerQueue.h
@ -0,0 +1,26 @@
+#pragma once
+
+#include <IO/ISchedulerNode.h>
+
+#include <memory>
+
+
+namespace DB
+{
+
+/*
+ * Queue for pending requests for specific resource, leaf of hierarchy.
+ */
+class ISchedulerQueue : public ISchedulerNode
+{
+public:
+    ISchedulerQueue(EventQueue * event_queue_, const Poco::Util::AbstractConfiguration & config = emptyConfig(), const String & config_prefix = {})
+        : ISchedulerNode(event_queue_, config, config_prefix)
+    {}
+
+    /// Enqueue new request to be executed using underlying resource.
+    /// Should be called outside of scheduling subsystem, implementation must be thread-safe.
+    virtual void enqueueRequest(ResourceRequest * request) = 0;
+};
+
+}
--- a/src/IO/Resource/ClassifiersConfig.cpp
+++ b/src/IO/Resource/ClassifiersConfig.cpp
@ -0,0 +1,40 @@
+#include <IO/Resource/ClassifiersConfig.h>
+
+#include <Common/Exception.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int RESOURCE_NOT_FOUND;
+}
+
+ClassifierDescription::ClassifierDescription(const Poco::Util::AbstractConfiguration & config, const String & config_prefix)
+{
+    Poco::Util::AbstractConfiguration::Keys keys;
+    config.keys(config_prefix, keys);
+    for (const auto & key : keys)
+        emplace(key, config.getString(config_prefix + "." + key));
+}
+
+ClassifiersConfig::ClassifiersConfig(const Poco::Util::AbstractConfiguration & config)
+{
+    Poco::Util::AbstractConfiguration::Keys keys;
+    const String config_prefix = "classifiers";
+    config.keys(config_prefix, keys);
+    for (const auto & key : keys)
+        classifiers.emplace(std::piecewise_construct,
+            std::forward_as_tuple(key),
+            std::forward_as_tuple(config, config_prefix + "." + key));
+}
+
+const ClassifierDescription & ClassifiersConfig::get(const String & classifier_name)
+{
+    if (auto it = classifiers.find(classifier_name); it != classifiers.end())
+        return it->second;
+    else
+        throw Exception(ErrorCodes::RESOURCE_NOT_FOUND, "Unknown classifier '{}' to access resources", classifier_name);
+}
+
+}
--- a/src/IO/Resource/ClassifiersConfig.h
+++ b/src/IO/Resource/ClassifiersConfig.h
@ -0,0 +1,39 @@
+#pragma once
+
+#include <base/types.h>
+#include <Poco/Util/AbstractConfiguration.h>
+#include <unordered_map>
+
+namespace DB
+{
+
+/// Mapping of resource name into path string (e.g. "disk1" -> "/path/to/class")
+struct ClassifierDescription : std::unordered_map<String, String>
+{
+    ClassifierDescription(const Poco::Util::AbstractConfiguration & config, const String & config_prefix);
+};
+
+/*
+ * Loads a config with the following format:
+ * <classifiers>
+ *   <classifier1>
+ *     <resource1>/path/to/queue</resource1>
+ *     <resource2>/path/to/another/queue</resource2>
+ *   </classifier1>
+ *   ...
+ *   <classifierN>...</classifierN>
+ * </classifiers>
+ */
+class ClassifiersConfig
+{
+public:
+    ClassifiersConfig() = default;
+    explicit ClassifiersConfig(const Poco::Util::AbstractConfiguration & config);
+
+    const ClassifierDescription & get(const String & classifier_name);
+
+private:
+    std::unordered_map<String, ClassifierDescription> classifiers; // by classifier_name
+};
+
+}
--- a/src/IO/Resource/FifoQueue.cpp
+++ b/src/IO/Resource/FifoQueue.cpp
@ -0,0 +1,13 @@
+#include <IO/Resource/FifoQueue.h>
+
+#include <IO/SchedulerNodeFactory.h>
+
+namespace DB
+{
+
+void registerFifoQueue(SchedulerNodeFactory & factory)
+{
+    factory.registerMethod<FifoQueue>("fifo");
+}
+
+}
--- a/src/IO/Resource/FifoQueue.h
+++ b/src/IO/Resource/FifoQueue.h
@ -0,0 +1,91 @@
+#pragma once
+
+#include <Common/Stopwatch.h>
+
+#include <IO/ISchedulerQueue.h>
+
+#include <Poco/Util/AbstractConfiguration.h>
+
+#include <deque>
+#include <mutex>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int INVALID_SCHEDULER_NODE;
+}
+
+/*
+ * FIFO queue to hold pending resource requests
+ */
+class FifoQueue : public ISchedulerQueue
+{
+public:
+    FifoQueue(EventQueue * event_queue_, const Poco::Util::AbstractConfiguration & config, const String & config_prefix)
+        : ISchedulerQueue(event_queue_, config, config_prefix)
+    {}
+
+    bool equals(ISchedulerNode * other) override
+    {
+        if (auto * o = dynamic_cast<FifoQueue *>(other))
+            return true;
+        return false;
+    }
+
+    void enqueueRequest(ResourceRequest * request) override
+    {
+        std::unique_lock lock(mutex);
+        request->enqueue_ns = clock_gettime_ns();
+        bool was_empty = requests.empty();
+        requests.push_back(request);
+        if (was_empty)
+            scheduleActivation();
+    }
+
+    std::pair<ResourceRequest *, bool> dequeueRequest() override
+    {
+        std::unique_lock lock(mutex);
+        if (requests.empty())
+            return {nullptr, false};
+        ResourceRequest * result = requests.front();
+        requests.pop_front();
+        return {result, !requests.empty()};
+    }
+
+    bool isActive() override
+    {
+        std::unique_lock lock(mutex);
+        return !requests.empty();
+    }
+
+    void activateChild(ISchedulerNode *) override
+    {
+        assert(false); // queue cannot have children
+    }
+
+    void attachChild(const SchedulerNodePtr &) override
+    {
+        throw Exception(
+            ErrorCodes::INVALID_SCHEDULER_NODE,
+            "Cannot add child to leaf scheduler queue: {}",
+            getPath());
+    }
+
+    void removeChild(ISchedulerNode *) override
+    {
+    }
+
+    ISchedulerNode * getChild(const String &) override
+    {
+        return nullptr;
+    }
+
+private:
+    std::mutex mutex;
+    std::deque<ResourceRequest *> requests;
+};
+
+}
--- a/src/IO/Resource/PriorityPolicy.cpp
+++ b/src/IO/Resource/PriorityPolicy.cpp
@ -0,0 +1,13 @@
+#include <IO/Resource/PriorityPolicy.h>
+
+#include <IO/SchedulerNodeFactory.h>
+
+namespace DB
+{
+
+void registerPriorityPolicy(SchedulerNodeFactory & factory)
+{
+    factory.registerMethod<PriorityPolicy>("priority");
+}
+
+}
--- a/src/IO/Resource/PriorityPolicy.h
+++ b/src/IO/Resource/PriorityPolicy.h
@ -0,0 +1,143 @@
+#pragma once
+
+#include <IO/ISchedulerQueue.h>
+#include <IO/SchedulerRoot.h>
+
+#include <algorithm>
+#include <unordered_map>
+#include <vector>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int INVALID_SCHEDULER_NODE;
+}
+
+/*
+ * Scheduler node that implements priority scheduling policy.
+ * Requests are scheduled in order of priorities.
+ */
+class PriorityPolicy : public ISchedulerNode
+{
+    /// Scheduling state of a child
+    struct Item
+    {
+        ISchedulerNode * child = nullptr;
+        Int64 priority = 0; // higher value means higher priority
+
+        /// For max-heap by priority
+        bool operator<(const Item& rhs) const noexcept
+        {
+            return priority < rhs.priority;
+        }
+    };
+
+public:
+    PriorityPolicy(EventQueue * event_queue_, const Poco::Util::AbstractConfiguration & config = emptyConfig(), const String & config_prefix = {})
+        : ISchedulerNode(event_queue_, config, config_prefix)
+    {}
+
+    bool equals(ISchedulerNode * other) override
+    {
+        if (auto * o = dynamic_cast<PriorityPolicy *>(other))
+            return true;
+        return false;
+    }
+
+    void attachChild(const SchedulerNodePtr & child) override
+    {
+        // Take ownership
+        chassert(child->parent == nullptr);
+        if (auto [it, inserted] = children.emplace(child->basename, child); !inserted)
+            throw Exception(
+                ErrorCodes::INVALID_SCHEDULER_NODE,
+                "Can't add another child with the same path: {}",
+                it->second->getPath());
+
+        // Attach
+        child->setParent(this);
+
+        // Activate child if it is not empty
+        if (child->isActive())
+            activateChild(child.get());
+    }
+
+    void removeChild(ISchedulerNode * child) override
+    {
+        if (auto iter = children.find(child->basename); iter != children.end())
+        {
+            SchedulerNodePtr removed = iter->second;
+
+            // Deactivate: detach is not very common operation, so we can afford O(N) here
+            for (auto i = items.begin(), e = items.end(); i != e; ++i)
+            {
+                if (i->child == removed.get())
+                {
+                    items.erase(i);
+                    // Element was removed from inside of heap -- heap must be rebuilt
+                    std::make_heap(items.begin(), items.end());
+                    break;
+                }
+            }
+
+            // Detach
+            removed->setParent(nullptr);
+
+            // Get rid of ownership
+            children.erase(iter);
+        }
+    }
+
+    ISchedulerNode * getChild(const String & child_name) override
+    {
+        if (auto iter = children.find(child_name); iter != children.end())
+            return iter->second.get();
+        else
+            return nullptr;
+    }
+
+    std::pair<ResourceRequest *, bool> dequeueRequest() override
+    {
+        if (items.empty())
+            return {nullptr, false};
+
+        // Recursively pull request from child
+        auto [request, child_active] = items.front().child->dequeueRequest();
+        assert(request != nullptr);
+
+        // Deactivate child if it is empty
+        if (!child_active)
+        {
+            std::pop_heap(items.begin(), items.end());
+            items.pop_back();
+        }
+
+        return {request, !items.empty()};
+    }
+
+    bool isActive() override
+    {
+        return !items.empty();
+    }
+
+    void activateChild(ISchedulerNode * child) override
+    {
+        bool activate_parent = items.empty();
+        items.emplace_back(Item{child, child->info.priority});
+        std::push_heap(items.begin(), items.end());
+        if (activate_parent && parent)
+            parent->activateChild(this);
+    }
+
+private:
+    /// Heap of active children
+    std::vector<Item> items;
+
+    /// All children with ownership
+    std::unordered_map<String, SchedulerNodePtr> children; // basename -> child
+};
+
+}
--- a/src/IO/Resource/SemaphoreConstraint.cpp
+++ b/src/IO/Resource/SemaphoreConstraint.cpp
@ -0,0 +1,13 @@
+#include <IO/Resource/SemaphoreConstraint.h>
+
+#include <IO/SchedulerNodeFactory.h>
+
+namespace DB
+{
+
+void registerSemaphoreConstraint(SchedulerNodeFactory & factory)
+{
+    factory.registerMethod<SemaphoreConstraint>("inflight_limit");
+}
+
+}
--- a/src/IO/Resource/SemaphoreConstraint.h
+++ b/src/IO/Resource/SemaphoreConstraint.h
@ -0,0 +1,138 @@
+#pragma once
+
+#include <IO/ISchedulerConstraint.h>
+#include <IO/SchedulerRoot.h>
+
+#include <mutex>
+#include <limits>
+#include <utility>
+
+namespace DB
+{
+
+/*
+ * Limited concurrency constraint.
+ * Blocks if either number of concurrent in-flight requests exceeds `max_requests`, or their total cost exceeds `max_cost`
+ */
+class SemaphoreConstraint : public ISchedulerConstraint
+{
+    static constexpr Int64 default_max_requests = std::numeric_limits<Int64>::max();
+    static constexpr Int64 default_max_cost = std::numeric_limits<Int64>::max();
+public:
+    SemaphoreConstraint(EventQueue * event_queue_, const Poco::Util::AbstractConfiguration & config = emptyConfig(), const String & config_prefix = {})
+        : ISchedulerConstraint(event_queue_, config, config_prefix)
+        , max_requests(config.getInt64(config_prefix + ".max_requests", default_max_requests))
+        , max_cost(config.getInt64(config_prefix + ".max_cost", config.getInt64(config_prefix + ".max_bytes", default_max_cost)))
+    {}
+
+    bool equals(ISchedulerNode * other) override
+    {
+        if (auto * o = dynamic_cast<SemaphoreConstraint *>(other))
+            return max_requests == o->max_requests && max_cost == o->max_cost;
+        return false;
+    }
+
+    void attachChild(const std::shared_ptr<ISchedulerNode> & child_) override
+    {
+        // Take ownership
+        child = child_;
+        child->setParent(this);
+
+        // Activate if required
+        if (child->isActive())
+            activateChild(child.get());
+    }
+
+    void removeChild(ISchedulerNode * child_) override
+    {
+        if (child.get() == child_)
+        {
+            child_active = false; // deactivate
+            child->setParent(nullptr); // detach
+            child.reset();
+        }
+    }
+
+    ISchedulerNode * getChild(const String & child_name) override
+    {
+        if (child->basename == child_name)
+            return child.get();
+        else
+            return nullptr;
+    }
+
+    std::pair<ResourceRequest *, bool> dequeueRequest() override
+    {
+        // Dequeue request from the child
+        auto [request, child_now_active] = child->dequeueRequest();
+        if (!request)
+            return {nullptr, false};
+
+        // Request has reference to the first (closest to leaf) `constraint`, which can have `parent_constraint`.
+        // The former is initialized here dynamically and the latter is initialized once during hierarchy construction.
+        if (!request->constraint)
+            request->constraint = this;
+
+        // Update state on request arrival
+        std::unique_lock lock(mutex);
+        requests++;
+        cost += request->cost;
+        child_active = child_now_active;
+
+        return {request, active()};
+    }
+
+    void finishRequest(ResourceRequest * request) override
+    {
+        // Recursive traverse of parent flow controls in reverse order
+        if (parent_constraint)
+            parent_constraint->finishRequest(request);
+
+        // Update state on request departure
+        std::unique_lock lock(mutex);
+        bool was_active = active();
+        requests--;
+        cost -= request->cost;
+
+        // Schedule activation on transition from inactive state
+        if (!was_active && active())
+            scheduleActivation();
+    }
+
+    void activateChild(ISchedulerNode * child_) override
+    {
+        std::unique_lock lock(mutex);
+        if (child_ == child.get())
+            if (!std::exchange(child_active, true) && satisfied() && parent)
+                parent->activateChild(this);
+    }
+
+    bool isActive() override
+    {
+        std::unique_lock lock(mutex);
+        return active();
+    }
+
+private:
+    bool satisfied() const
+    {
+        return requests < max_requests && cost < max_cost;
+    }
+
+    bool active() const
+    {
+        return satisfied() && child_active;
+    }
+
+private:
+    std::mutex mutex;
+    Int64 requests = 0;
+    Int64 cost = 0;
+    bool child_active = false;
+
+    SchedulerNodePtr child;
+    Int64 max_requests = default_max_requests;
+    Int64 max_cost = default_max_cost;
+};
+
+}
--- a/src/IO/Resource/StaticResourceManager.cpp
+++ b/src/IO/Resource/StaticResourceManager.cpp
@ -0,0 +1,138 @@
+#include <IO/Resource/StaticResourceManager.h>
+
+#include <IO/SchedulerNodeFactory.h>
+#include <IO/ResourceManagerFactory.h>
+#include <IO/ISchedulerQueue.h>
+
+#include <Common/Exception.h>
+#include <Common/StringUtils/StringUtils.h>
+
+#include <map>
+#include <tuple>
+#include <algorithm>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int RESOURCE_ACCESS_DENIED;
+    extern const int RESOURCE_NOT_FOUND;
+    extern const int INVALID_SCHEDULER_NODE;
+}
+
+StaticResourceManager::Resource::Resource(
+    const String & name,
+    EventQueue * event_queue,
+    const Poco::Util::AbstractConfiguration & config,
+    const std::string & config_prefix)
+{
+    // Initialize scheduler nodes
+    Poco::Util::AbstractConfiguration::Keys keys;
+    std::sort(keys.begin(), keys.end()); // for parents to appear before children
+    config.keys(config_prefix, keys);
+    for (const auto & key : keys)
+    {
+        if (!startsWith(key, "node"))
+            continue;
+
+        // Validate path
+        String path = config.getString(config_prefix + "." + key + "[@path]", "");
+        if (path.empty())
+            throw Exception(ErrorCodes::INVALID_SCHEDULER_NODE, "Attribute 'path' must be specified in all nodes for resource '{}'", name);
+        if (path[0] != '/')
+            throw Exception(ErrorCodes::INVALID_SCHEDULER_NODE, "path must start with '/' for resource '{}'", name);
+
+        // Create node
+        String type = config.getString(config_prefix + "." + key + ".type", "fifo");
+        SchedulerNodePtr node = SchedulerNodeFactory::instance().get(type, event_queue, config, config_prefix + "." + key);
+        node->basename = path.substr(1);
+
+        // Take ownership
+        if (auto [_, inserted] = nodes.emplace(path, node); !inserted)
+            throw Exception(ErrorCodes::INVALID_SCHEDULER_NODE, "Duplicate path '{}' for resource '{}'", path, name);
+
+        // Attach created node to parent (if not root)
+        if (path != "/")
+        {
+            String parent_path = path.substr(0, path.rfind('/'));
+            if (parent_path.empty())
+                parent_path = "/";
+            if (auto parent = nodes.find(parent_path); parent != nodes.end())
+                parent->second->attachChild(node);
+            else
+                throw Exception(ErrorCodes::INVALID_SCHEDULER_NODE, "Parent doesn't exist for path '{}' for resource '{}'", path, name);
+        }
+    }
+
+    if (nodes.find("/") == nodes.end())
+        throw Exception(ErrorCodes::INVALID_SCHEDULER_NODE, "undefined root node path '/' for resource '{}'", name);
+}
+
+StaticResourceManager::Classifier::Classifier(const StaticResourceManager & manager, const ClassifierDescription & cfg)
+{
+    for (auto [resource_name, path] : cfg)
+    {
+        if (auto resource_iter = manager.resources.find(resource_name); resource_iter != manager.resources.end())
+        {
+            const Resource & resource = resource_iter->second;
+            if (auto node_iter = resource.nodes.find(path); node_iter != resource.nodes.end())
+            {
+                if (auto * queue = dynamic_cast<ISchedulerQueue *>(node_iter->second.get()))
+                    resources.emplace(resource_name, ResourceLink{.queue = queue});
+                else
+                    throw Exception(ErrorCodes::RESOURCE_NOT_FOUND, "Unable to access non-queue node at path '{}' for resource '{}'", path, resource_name);
+            }
+            else
+                throw Exception(ErrorCodes::RESOURCE_NOT_FOUND, "Path '{}' for resource '{}' does not exist", path, resource_name);
+        }
+        else
+            resources.emplace(resource_name, ResourceLink{}); // resource not configured - unlimited
+    }
+}
+
+ResourceLink StaticResourceManager::Classifier::get(const String & resource_name)
+{
+    if (auto iter = resources.find(resource_name); iter != resources.end())
+        return iter->second;
+    else
+        throw Exception(ErrorCodes::RESOURCE_ACCESS_DENIED, "Access denied to resource '{}'", resource_name);
+}
+
+void StaticResourceManager::updateConfiguration(const Poco::Util::AbstractConfiguration & config)
+{
+    if (!resources.empty())
+        return; // already initialized, configuration update is not supported
+
+    Poco::Util::AbstractConfiguration::Keys keys;
+    const String config_prefix = "resources";
+    config.keys(config_prefix, keys);
+
+    // Create resource for every element under <resources> tag
+    for (const auto & key : keys)
+    {
+        auto [iter, _] = resources.emplace(std::piecewise_construct,
+            std::forward_as_tuple(key),
+            std::forward_as_tuple(key, scheduler.event_queue, config, config_prefix + "." + key));
+        // Attach root of resource to scheduler
+        scheduler.attachChild(iter->second.nodes.find("/")->second);
+    }
+
+    // Initialize classifiers
+    classifiers = std::make_unique<ClassifiersConfig>(config);
+
+    // Run scheduler thread
+    scheduler.start();
+}
+
+ClassifierPtr StaticResourceManager::acquire(const String & classifier_name)
+{
+    return std::make_shared<Classifier>(*this, classifiers->get(classifier_name));
+}
+
+void registerStaticResourceManager(ResourceManagerFactory & factory)
+{
+    factory.registerMethod<StaticResourceManager>("static");
+}
+
+}
--- a/src/IO/Resource/StaticResourceManager.h
+++ b/src/IO/Resource/StaticResourceManager.h
@ -0,0 +1,49 @@
+#pragma once
+
+#include <IO/IResourceManager.h>
+#include <IO/SchedulerRoot.h>
+#include <IO/Resource/ClassifiersConfig.h>
+
+#include <mutex>
+
+namespace DB
+{
+
+/*
+ * Reads `<resources>` from config at startup and registers them in single `SchedulerRoot`.
+ * Do not support configuration updates, server restart is required.
+ */
+class StaticResourceManager : public IResourceManager
+{
+public:
+    // Just initialization, any further updates are ignored for the sake of simplicity
+    // NOTE: manager must be initialized before any acquire() calls to avoid races
+    void updateConfiguration(const Poco::Util::AbstractConfiguration & config) override;
+
+    ClassifierPtr acquire(const String & classifier_name) override;
+
+private:
+    struct Resource
+    {
+        std::unordered_map<String, SchedulerNodePtr> nodes; // by paths
+
+        Resource(
+            const String & name,
+            EventQueue * event_queue,
+            const Poco::Util::AbstractConfiguration & config,
+            const std::string & config_prefix);
+    };
+
+    struct Classifier : public IClassifier
+    {
+        Classifier(const StaticResourceManager & manager, const ClassifierDescription & cfg);
+        ResourceLink get(const String & resource_name) override;
+        std::unordered_map<String, ResourceLink> resources; // accessible resources by names
+    };
+
+    SchedulerRoot scheduler;
+    std::unordered_map<String, Resource> resources; // by name
+    std::unique_ptr<ClassifiersConfig> classifiers;
+};
+
+}
--- a/src/IO/Resource/registerResourceManagers.cpp
+++ b/src/IO/Resource/registerResourceManagers.cpp
@ -0,0 +1,15 @@
+#include <IO/Resource/registerResourceManagers.h>
+#include <IO/ResourceManagerFactory.h>
+
+namespace DB
+{
+
+void registerStaticResourceManager(ResourceManagerFactory &);
+
+void registerResourceManagers()
+{
+    auto & factory = ResourceManagerFactory::instance();
+    registerStaticResourceManager(factory);
+}
+
+}
--- a/src/IO/Resource/registerResourceManagers.h
+++ b/src/IO/Resource/registerResourceManagers.h
@ -0,0 +1,8 @@
+#pragma once
+
+namespace DB
+{
+
+void registerResourceManagers();
+
+}
--- a/src/IO/Resource/registerSchedulerNodes.cpp
+++ b/src/IO/Resource/registerSchedulerNodes.cpp
@ -0,0 +1,28 @@
+#include <IO/Resource/registerSchedulerNodes.h>
+
+#include <IO/ISchedulerNode.h>
+#include <IO/ISchedulerConstraint.h>
+#include <IO/SchedulerNodeFactory.h>
+
+namespace DB
+{
+
+void registerPriorityPolicy(SchedulerNodeFactory &);
+void registerSemaphoreConstraint(SchedulerNodeFactory &);
+void registerFifoQueue(SchedulerNodeFactory &);
+
+void registerSchedulerNodes()
+{
+    auto & factory = SchedulerNodeFactory::instance();
+
+    // ISchedulerNode
+    registerPriorityPolicy(factory);
+
+    // ISchedulerConstraint
+    registerSemaphoreConstraint(factory);
+
+    // ISchedulerQueue
+    registerFifoQueue(factory);
+}
+
+}
--- a/src/IO/Resource/registerSchedulerNodes.h
+++ b/src/IO/Resource/registerSchedulerNodes.h
@ -0,0 +1,8 @@
+#pragma once
+
+namespace DB
+{
+
+void registerSchedulerNodes();
+
+}
--- a/src/IO/Resource/tests/ResourceTest.h
+++ b/src/IO/Resource/tests/ResourceTest.h
@ -0,0 +1,309 @@
+#pragma once
+
+#include <IO/IResourceManager.h>
+#include <IO/SchedulerRoot.h>
+#include <IO/ResourceGuard.h>
+#include <IO/SchedulerNodeFactory.h>
+#include <IO/Resource/PriorityPolicy.h>
+#include <IO/Resource/FifoQueue.h>
+#include <IO/Resource/SemaphoreConstraint.h>
+#include <IO/Resource/registerSchedulerNodes.h>
+#include <IO/Resource/registerResourceManagers.h>
+
+#include <Poco/Util/XMLConfiguration.h>
+
+#include <atomic>
+#include <barrier>
+#include <unordered_map>
+#include <mutex>
+#include <set>
+#include <sstream>
+
+namespace DB
+{
+
+struct ResourceTestBase
+{
+    ResourceTestBase()
+    {
+        [[maybe_unused]] static bool typesRegistered = [] { registerSchedulerNodes(); registerResourceManagers(); return true; }();
+    }
+
+    template <class TClass>
+    static TClass * add(EventQueue * event_queue, SchedulerNodePtr & root_node, const String & path, const String & xml = {})
+    {
+        std::stringstream stream; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
+        stream << "<resource><node path=\"" << path << "\">" << xml << "</node></resource>";
+        Poco::AutoPtr config{new Poco::Util::XMLConfiguration(stream)};
+        String config_prefix = "node";
+
+        if (path == "/")
+        {
+            EXPECT_TRUE(root_node.get() == nullptr);
+            root_node.reset(new TClass(event_queue, *config, config_prefix));
+            return static_cast<TClass *>(root_node.get());
+        }
+
+        EXPECT_TRUE(root_node.get() != nullptr); // root should be initialized first
+        ISchedulerNode * parent = root_node.get();
+        size_t pos = 1;
+        String child_name;
+        while (pos < path.length())
+        {
+            size_t slash = path.find('/', pos);
+            if (slash != String::npos)
+            {
+                parent = parent->getChild(path.substr(pos, slash - pos));
+                EXPECT_TRUE(parent != nullptr); // parent does not exist
+                pos = slash + 1;
+            }
+            else
+            {
+                child_name = path.substr(pos);
+                pos = String::npos;
+            }
+        }
+
+        EXPECT_TRUE(!child_name.empty()); // wrong path
+        SchedulerNodePtr node = std::make_shared<TClass>(event_queue, *config, config_prefix);
+        node->basename = child_name;
+        parent->attachChild(node);
+        return static_cast<TClass *>(node.get());
+    }
+};
+
+
+struct ConstraintTest : public SemaphoreConstraint
+{
+    ConstraintTest(EventQueue * event_queue_, const Poco::Util::AbstractConfiguration & config = emptyConfig(), const String & config_prefix = {})
+        : SemaphoreConstraint(event_queue_, config, config_prefix)
+    {}
+
+    std::pair<ResourceRequest *, bool> dequeueRequest() override
+    {
+        auto [request, active] = SemaphoreConstraint::dequeueRequest();
+        if (request)
+        {
+            std::unique_lock lock(mutex);
+            requests.insert(request);
+        }
+        return {request, active};
+    }
+
+    void finishRequest(ResourceRequest * request) override
+    {
+        {
+            std::unique_lock lock(mutex);
+            requests.erase(request);
+        }
+        SemaphoreConstraint::finishRequest(request);
+    }
+
+    std::mutex mutex;
+    std::set<ResourceRequest *> requests;
+};
+
+class ResourceTestClass : public ResourceTestBase
+{
+    struct Request : public ResourceRequest
+    {
+        String name;
+
+        Request(ResourceCost cost_, const String & name_)
+            : ResourceRequest(cost_)
+            , name(name_)
+        {}
+
+        void execute() override
+        {
+        }
+    };
+
+public:
+    template <class TClass>
+    void add(const String & path, const String & xml = {})
+    {
+        ResourceTestBase::add<TClass>(&event_queue, root_node, path, xml);
+    }
+
+    void enqueue(const String & path, const std::vector<ResourceCost> & costs)
+    {
+        ASSERT_TRUE(root_node.get() != nullptr); // root should be initialized first
+        ISchedulerNode * node = root_node.get();
+        size_t pos = 1;
+        while (pos < path.length())
+        {
+            size_t slash = path.find('/', pos);
+            if (slash != String::npos)
+            {
+                node = node->getChild(path.substr(pos, slash - pos));
+                ASSERT_TRUE(node != nullptr); // does not exist
+                pos = slash + 1;
+            }
+            else
+            {
+                node = node->getChild(path.substr(pos));
+                pos = String::npos;
+            }
+        }
+        ISchedulerQueue * queue = dynamic_cast<ISchedulerQueue *>(node);
+        ASSERT_TRUE(queue != nullptr); // not a queue
+
+        for (ResourceCost cost : costs)
+        {
+            queue->enqueueRequest(new Request(cost, queue->basename));
+        }
+        processEvents(); // to activate queues
+    }
+
+    void dequeue(size_t count_limit = size_t(-1), ResourceCost cost_limit = ResourceCostMax)
+    {
+        while (count_limit > 0 && cost_limit > 0)
+        {
+            if (auto [request, _] = root_node->dequeueRequest(); request)
+            {
+                count_limit--;
+                cost_limit -= request->cost;
+                handle(static_cast<Request *>(request));
+            }
+            else
+            {
+                break;
+            }
+        }
+    }
+
+    void handle(Request * request)
+    {
+        consumed_cost[request->name] += request->cost;
+        delete request;
+    }
+
+    void consumed(const String & name, ResourceCost value, ResourceCost error = 0)
+    {
+        EXPECT_TRUE(consumed_cost[name] >= value - error);
+        EXPECT_TRUE(consumed_cost[name] <= value + error);
+        consumed_cost[name] -= value;
+    }
+
+    void processEvents()
+    {
+        while (event_queue.tryProcess()) {}
+    }
+
+private:
+    EventQueue event_queue;
+    SchedulerNodePtr root_node;
+    std::unordered_map<String, ResourceCost> consumed_cost;
+};
+
+template <class TManager>
+struct ResourceTestManager : public ResourceTestBase
+{
+    ResourceManagerPtr manager;
+
+    std::vector<ThreadFromGlobalPool> threads;
+    std::barrier<> busy_period;
+
+    struct Guard : public ResourceGuard
+    {
+        ResourceTestManager & t;
+
+        Guard(ResourceTestManager & t_, ResourceLink link_, ResourceCost cost)
+            : ResourceGuard(link_, cost, PostponeLocking)
+            , t(t_)
+        {
+            t.onEnqueue(link);
+            lock();
+            t.onExecute(link);
+        }
+    };
+
+    struct TItem
+    {
+        std::atomic<Int64> enqueued = 0; // number of enqueued requests
+        std::atomic<Int64> left = 0; // number of requests left to be executed
+    };
+
+    struct ResourceQueueHash
+    {
+        size_t operator()(const ResourceLink & link) const
+        {
+            return std::hash<ISchedulerQueue*>()(link.queue);
+        }
+    };
+
+    std::mutex link_data_mutex;
+    std::unordered_map<ResourceLink, TItem, ResourceQueueHash> link_data;
+
+    explicit ResourceTestManager(size_t thread_count = 1)
+        : manager(new TManager)
+        , busy_period(thread_count)
+    {}
+
+    ~ResourceTestManager()
+    {
+        for (auto & thread : threads)
+            thread.join();
+    }
+
+    void update(const String & xml)
+    {
+        std::istringstream stream(xml); // STYLE_CHECK_ALLOW_STD_STRING_STREAM
+        Poco::AutoPtr config{new Poco::Util::XMLConfiguration(stream)};
+        manager->updateConfiguration(*config);
+    }
+
+    auto & getLinkData(ResourceLink link)
+    {
+        std::unique_lock lock{link_data_mutex};
+        return link_data[link];
+    }
+
+    // Use at least two threads for each queue to avoid queue being deactivated:
+    // while the first request is executing, the second request is in queue - holding it active.
+    // use onEnqueue() and onExecute() functions for this purpose.
+    void onEnqueue(ResourceLink link)
+    {
+        getLinkData(link).enqueued.fetch_add(1, std::memory_order_relaxed);
+    }
+    void onExecute(ResourceLink link)
+    {
+        auto & data = getLinkData(link);
+        Int64 left = data.left.fetch_sub(1, std::memory_order_relaxed) - 1;
+        Int64 enqueued = data.enqueued.fetch_sub(1, std::memory_order_relaxed) - 1;
+        while (left > 0 && enqueued <= 0) // Ensure at least one thread has already enqueued itself (or there is no more requests)
+        {
+            std::this_thread::yield();
+            left = data.left.load();
+            enqueued = data.enqueued.load();
+        }
+    }
+
+    // This is required for proper busy period start, i.e. everyone to be seen by scheduler as appeared at the same time:
+    //  - resource is blocked with queries by leader thread;
+    //  - leader thread notifies followers to enqueue their requests;
+    //  - leader thread unblocks resource;
+    //  - busy period begins.
+    // NOTE: actually leader's request(s) make their own small busy period.
+    void blockResource(ResourceLink link)
+    {
+        ResourceGuard g(link, 1, ResourceGuard::PostponeLocking);
+        g.lock();
+        // NOTE: at this point we assume resource to be blocked by single request (<max_requests>1</max_requests>)
+        busy_period.arrive_and_wait(); // (1) notify all followers that resource is blocked
+        busy_period.arrive_and_wait(); // (2) wait all followers to enqueue their requests
+    }
+    void startBusyPeriod(ResourceLink link, ResourceCost cost, Int64 total_requests)
+    {
+        getLinkData(link).left += total_requests + 1;
+        busy_period.arrive_and_wait(); // (1) wait leader to block resource
+        ResourceGuard g(link, cost, ResourceGuard::PostponeLocking);
+        onEnqueue(link);
+        busy_period.arrive_and_wait(); // (2) notify leader to unblock
+        g.lock();
+        onExecute(link);
+    }
+};
+
+}
--- a/src/IO/Resource/tests/gtest_resource_class_priority.cpp
+++ b/src/IO/Resource/tests/gtest_resource_class_priority.cpp
@ -0,0 +1,122 @@
+#include <gtest/gtest.h>
+
+#include <IO/Resource/tests/ResourceTest.h>
+
+#include <IO/Resource/PriorityPolicy.h>
+
+using namespace DB;
+
+using ResourceTest = ResourceTestClass;
+
+TEST(IOResourcePriorityPolicy, Factory)
+{
+    ResourceTest t;
+
+    Poco::AutoPtr cfg = new Poco::Util::XMLConfiguration();
+    SchedulerNodePtr prio = SchedulerNodeFactory::instance().get("priority", /* event_queue = */ nullptr, *cfg, "");
+    EXPECT_TRUE(dynamic_cast<PriorityPolicy *>(prio.get()) != nullptr);
+}
+
+TEST(IOResourcePriorityPolicy, Priorities)
+{
+    ResourceTest t;
+
+    t.add<PriorityPolicy>("/");
+    t.add<FifoQueue>("/A", "<priority>1</priority>");
+    t.add<FifoQueue>("/B", "<priority>2</priority>");
+    t.add<FifoQueue>("/C", "<priority>3</priority>");
+
+    t.enqueue("/A", {10, 10, 10});
+    t.enqueue("/B", {10, 10, 10});
+    t.enqueue("/C", {10, 10, 10});
+
+    t.dequeue(2);
+    t.consumed("A", 0);
+    t.consumed("B", 0);
+    t.consumed("C", 20);
+
+    t.dequeue(2);
+    t.consumed("A", 0);
+    t.consumed("B", 10);
+    t.consumed("C", 10);
+
+    t.dequeue(2);
+    t.consumed("A", 0);
+    t.consumed("B", 20);
+    t.consumed("C", 0);
+
+    t.dequeue();
+    t.consumed("A", 30);
+    t.consumed("B", 0);
+    t.consumed("C", 0);
+}
+
+TEST(IOResourcePriorityPolicy, Activation)
+{
+    ResourceTest t;
+
+    t.add<PriorityPolicy>("/");
+    t.add<FifoQueue>("/A", "<priority>1</priority>");
+    t.add<FifoQueue>("/B", "<priority>2</priority>");
+    t.add<FifoQueue>("/C", "<priority>3</priority>");
+
+    t.enqueue("/A", {10, 10, 10, 10, 10, 10});
+    t.enqueue("/B", {10});
+    t.enqueue("/C", {10, 10});
+
+    t.dequeue(3);
+    t.consumed("A", 0);
+    t.consumed("B", 10);
+    t.consumed("C", 20);
+
+    t.dequeue(2);
+    t.consumed("A", 20);
+    t.consumed("B", 0);
+    t.consumed("C", 0);
+
+    t.enqueue("/B", {10, 10, 10});
+    t.dequeue(2);
+    t.consumed("A", 0);
+    t.consumed("B", 20);
+    t.consumed("C", 0);
+
+    t.enqueue("/C", {10, 10});
+    t.dequeue(3);
+    t.consumed("A", 0);
+    t.consumed("B", 10);
+    t.consumed("C", 20);
+
+    t.dequeue(2);
+    t.consumed("A", 20);
+    t.consumed("B", 0);
+    t.consumed("C", 0);
+}
+
+TEST(IOResourcePriorityPolicy, SinglePriority)
+{
+    ResourceTest t;
+
+    t.add<PriorityPolicy>("/");
+    t.add<FifoQueue>("/A");
+
+    for (int i = 0; i < 3; i++)
+    {
+        t.enqueue("/A", {10, 10});
+        t.dequeue(1);
+        t.consumed("A", 10);
+
+        for (int j = 0; j < 3; j++)
+        {
+            t.enqueue("/A", {10, 10, 10});
+            t.dequeue(1);
+            t.consumed("A", 10);
+            t.dequeue(1);
+            t.consumed("A", 10);
+            t.dequeue(1);
+            t.consumed("A", 10);
+        }
+
+        t.dequeue(1);
+        t.consumed("A", 10);
+    }
+}
--- a/src/IO/Resource/tests/gtest_resource_manager_static.cpp
+++ b/src/IO/Resource/tests/gtest_resource_manager_static.cpp
@ -0,0 +1,103 @@
+#include <gtest/gtest.h>
+
+#include <IO/Resource/tests/ResourceTest.h>
+
+#include <IO/Resource/StaticResourceManager.h>
+#include <Poco/Util/XMLConfiguration.h>
+
+using namespace DB;
+
+using ResourceTest = ResourceTestManager<StaticResourceManager>;
+using TestGuard = ResourceTest::Guard;
+
+TEST(IOResourceStaticResourceManager, Smoke)
+{
+    ResourceTest t;
+
+    t.update(R"CONFIG(
+        <clickhouse>
+            <resources>
+                <res1>
+                    <node path="/">      <type>inflight_limit</type><max_requests>10</max_requests></node>
+                    <node path="/prio">  <type>priority</type></node>
+                    <node path="/prio/A"></node>
+                    <node path="/prio/B"><priority>1</priority></node>
+                </res1>
+            </resources>
+            <classifiers>
+                <A><res1>/prio/A</res1></A>
+                <B><res1>/prio/B</res1></B>
+            </classifiers>
+        </clickhouse>
+    )CONFIG");
+
+    ClassifierPtr ca = t.manager->acquire("A");
+    ClassifierPtr cb = t.manager->acquire("B");
+
+    for (int i = 0; i < 10; i++)
+    {
+        ResourceGuard ga(ca->get("res1"));
+        ResourceGuard gb(cb->get("res1"));
+    }
+}
+
+TEST(IOResourceStaticResourceManager, Prioritization)
+{
+    constexpr size_t threads_per_queue = 2;
+    int requests_per_thead = 100;
+    ResourceTest t(4 * threads_per_queue + 1);
+
+    t.update(R"CONFIG(
+        <clickhouse>
+            <resources>
+                <res1>
+                    <node path="/">           <type>inflight_limit</type><max_requests>1</max_requests></node>
+                    <node path="/prio">       <type>priority</type></node>
+                    <node path="/prio/A">     <priority>-1</priority></node>
+                    <node path="/prio/B">     <priority>1</priority></node>
+                    <node path="/prio/C">     </node>
+                    <node path="/prio/D">     </node>
+                    <node path="/prio/leader"></node>
+                </res1>
+            </resources>
+            <classifiers>
+                <A><res1>/prio/A</res1></A>
+                <B><res1>/prio/B</res1></B>
+                <C><res1>/prio/C</res1></C>
+                <D><res1>/prio/D</res1></D>
+                <leader><res1>/prio/leader</res1></leader>
+            </classifiers>
+        </clickhouse>
+    )CONFIG");
+
+    std::optional<Int64> last_priority;
+    auto check = [&] (Int64 priority)
+    {
+        // Lock is not required here because this is called during request execution and we have max_requests = 1
+        if (last_priority)
+            EXPECT_TRUE(priority <= *last_priority); // Should be true if every queue arrived at the same time at busy period start
+        last_priority = priority;
+    };
+
+    for (String name : {"A", "B", "C", "D"})
+    {
+        for (int thr = 0; thr < threads_per_queue; thr++)
+        {
+            t.threads.emplace_back([&, name]
+            {
+                ClassifierPtr c = t.manager->acquire(name);
+                ResourceLink link = c->get("res1");
+                t.startBusyPeriod(link, 1, requests_per_thead);
+                for (int req = 0; req < requests_per_thead; req++)
+                {
+                    TestGuard g(t, link, 1);
+                    check(link.queue->info.priority);
+                }
+            });
+        }
+    }
+
+    ClassifierPtr c = t.manager->acquire("leader");
+    ResourceLink link = c->get("res1");
+    t.blockResource(link);
+}
--- a/src/IO/Resource/tests/gtest_resource_scheduler.cpp
+++ b/src/IO/Resource/tests/gtest_resource_scheduler.cpp
@ -0,0 +1,113 @@
+#include <gtest/gtest.h>
+
+#include <IO/SchedulerRoot.h>
+
+#include <IO/Resource/tests/ResourceTest.h>
+
+#include <future>
+
+using namespace DB;
+
+struct ResourceTest : public ResourceTestBase
+{
+    SchedulerRoot scheduler;
+
+    ResourceTest()
+    {
+        scheduler.start();
+    }
+
+    ~ResourceTest()
+    {
+        scheduler.stop(true);
+    }
+};
+
+struct ResourceHolder
+{
+    ResourceTest & t;
+    SchedulerNodePtr root_node;
+
+    explicit ResourceHolder(ResourceTest & t_)
+        : t(t_)
+    {}
+
+    ~ResourceHolder()
+    {
+        unregisterResource();
+    }
+
+    template <class TClass>
+    TClass * add(const String & path, const String & xml = {})
+    {
+        return ResourceTest::add<TClass>(t.scheduler.event_queue, root_node, path, xml);
+    }
+
+    ResourceLink addQueue(const String & path, const String & xml = {})
+    {
+        return {.queue = static_cast<ISchedulerQueue *>(ResourceTest::add<FifoQueue>(t.scheduler.event_queue, root_node, path, xml))};
+    }
+
+    void registerResource()
+    {
+        std::promise<void> p;
+        auto f = p.get_future();
+        t.scheduler.event_queue->enqueue([this, &p]
+        {
+            t.scheduler.attachChild(root_node);
+            p.set_value();
+        });
+        f.get();
+    }
+
+    void unregisterResource()
+    {
+        std::promise<void> p;
+        auto f = p.get_future();
+        t.scheduler.event_queue->enqueue([this, &p]
+        {
+            t.scheduler.removeChild(root_node.get());
+            p.set_value();
+        });
+        f.get();
+    }
+};
+
+TEST(IOSchedulerRoot, Smoke)
+{
+    ResourceTest t;
+
+    ResourceHolder r1(t);
+    auto * fc1 = r1.add<ConstraintTest>("/", "<max_requests>1</max_requests>");
+    r1.add<PriorityPolicy>("/prio");
+    auto a = r1.addQueue("/prio/A", "<priority>1</priority>");
+    auto b = r1.addQueue("/prio/B", "<priority>2</priority>");
+    r1.registerResource();
+
+    ResourceHolder r2(t);
+    auto * fc2 = r2.add<ConstraintTest>("/", "<max_requests>1</max_requests>");
+    r2.add<PriorityPolicy>("/prio");
+    auto c = r2.addQueue("/prio/C", "<priority>-1</priority>");
+    auto d = r2.addQueue("/prio/D", "<priority>-2</priority>");
+    r2.registerResource();
+
+    {
+        ResourceGuard rg(a);
+        EXPECT_TRUE(fc1->requests.contains(&rg.request));
+    }
+
+    {
+        ResourceGuard rg(b);
+        EXPECT_TRUE(fc1->requests.contains(&rg.request));
+    }
+
+    {
+        ResourceGuard rg(c);
+        EXPECT_TRUE(fc2->requests.contains(&rg.request));
+    }
+
+    {
+        ResourceGuard rg(d);
+        EXPECT_TRUE(fc2->requests.contains(&rg.request));
+    }
+}
--- a/src/IO/ResourceGuard.h
+++ b/src/IO/ResourceGuard.h
@ -0,0 +1,93 @@
+#pragma once
+
+#include <base/types.h>
+
+#include <IO/ResourceRequest.h>
+#include <IO/ISchedulerQueue.h>
+#include <IO/ISchedulerConstraint.h>
+
+#include <future>
+
+namespace DB
+{
+
+/*
+ * Scoped resource guard.
+ * Waits for resource to be available in constructor and releases resource in destructor
+ */
+class ResourceGuard
+{
+public:
+    enum ResourceGuardCtor
+    {
+        LockStraightAway, /// Lock inside constructor (default)
+        PostponeLocking /// Don't lock in constructor, but during later `lock()` call
+    };
+
+    struct Request : public ResourceRequest
+    {
+        /// Promise to be set on request execution
+        std::promise<void> dequeued;
+
+        explicit Request(ResourceCost cost_ = 1)
+            : ResourceRequest(cost_)
+        {}
+
+        void execute() override
+        {
+            // This function is executed inside scheduler thread and wakes thread issued this `request` (using ResourceGuard)
+            // That thread will continue execution and do real consumption of requested resource synchronously.
+            dequeued.set_value();
+        }
+    };
+
+    /// Creates pending request for resource; blocks while resource is not available (unless `PostponeLocking`)
+    explicit ResourceGuard(ResourceLink link_, ResourceCost cost = 1, ResourceGuardCtor ctor = LockStraightAway)
+        : link(link_)
+        , request(cost)
+    {
+        if (link.queue)
+        {
+            dequeued_future = request.dequeued.get_future();
+            link.queue->enqueueRequest(&request);
+            if (ctor == LockStraightAway)
+                lock();
+        }
+    }
+
+    ~ResourceGuard()
+    {
+        unlock();
+    }
+
+    /// Blocks until resource is available
+    void lock()
+    {
+        if (link.queue)
+            dequeued_future.get();
+    }
+
+    /// Report request execution has finished
+    void unlock()
+    {
+        if (link.queue)
+        {
+            assert(!dequeued_future.valid()); // unlock must be called only after lock()
+            if (request.constraint)
+                request.constraint->finishRequest(&request);
+        }
+    }
+
+    /// Mark request as unsuccessful; by default request is considered to be successful
+    void setFailure()
+    {
+        request.successful = false;
+    }
+
+public:
+    ResourceLink link;
+    Request request;
+    std::future<void> dequeued_future;
+};
+
+}
--- a/src/IO/ResourceManagerFactory.h
+++ b/src/IO/ResourceManagerFactory.h
@ -0,0 +1,55 @@
+#pragma once
+
+#include <Common/ErrorCodes.h>
+#include <Common/Exception.h>
+
+#include <IO/IResourceManager.h>
+
+#include <boost/noncopyable.hpp>
+
+#include <memory>
+#include <mutex>
+#include <unordered_map>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int INVALID_SCHEDULER_NODE;
+}
+
+class ResourceManagerFactory : private boost::noncopyable
+{
+public:
+    static ResourceManagerFactory & instance()
+    {
+        static ResourceManagerFactory ret;
+        return ret;
+    }
+
+    ResourceManagerPtr get(const String & name)
+    {
+        std::lock_guard lock{mutex};
+        if (auto iter = methods.find(name); iter != methods.end())
+            return iter->second();
+        throw Exception(ErrorCodes::INVALID_SCHEDULER_NODE, "Unknown scheduler node type: {}", name);
+    }
+
+    template <class TDerived>
+    void registerMethod(const String & name)
+    {
+        std::lock_guard lock{mutex};
+        methods[name] = [] ()
+        {
+            return std::make_shared<TDerived>();
+        };
+    }
+
+private:
+    std::mutex mutex;
+    using Method = std::function<ResourceManagerPtr()>;
+    std::unordered_map<String, Method> methods;
+};
+
+}
--- a/src/IO/ResourceRequest.h
+++ b/src/IO/ResourceRequest.h
@ -0,0 +1,92 @@
+#pragma once
+
+#include <base/types.h>
+#include <limits>
+
+namespace DB
+{
+
+// Forward declarations
+class ISchedulerQueue;
+class ISchedulerNode;
+class ISchedulerConstraint;
+
+/// Cost in terms of used resource (e.g. bytes for network IO)
+using ResourceCost = Int64;
+constexpr ResourceCost ResourceCostMax = std::numeric_limits<int>::max();
+
+/// Internal identifier of a resource (for arrays; unique per scheduler)
+using ResourceIdx = size_t;
+constexpr ResourceIdx ResourceIdxNotSet = ResourceIdx(-1);
+
+/// Timestamps (nanoseconds since epoch)
+using ResourceNs = UInt64;
+
+/*
+ * Info required for resource consumption.
+ */
+struct ResourceLink
+{
+    ISchedulerQueue * queue = nullptr;
+    bool operator==(const ResourceLink &) const = default;
+};
+
+/*
+ * Request for a resource consumption. The main moving part of the scheduling subsystem.
+ * Resource requests processing workflow:
+ *
+ * ----1=2222222222222=3=4=555555555555555=6-----> time
+ *     ^     ^         ^ ^          ^      ^
+ *     |     |         | |          |      |
+ *  enqueue wait dequeue execute consume finish
+ *
+ *  1) Request is enqueued using ISchedulerQueue::enqueueRequest().
+ *  2) Request competes with others for access to a resource; effectively just waiting in a queue.
+ *  3) Scheduler calls ISchedulerNode::dequeueRequest() that returns the request.
+ *  4) Callback ResourceRequest::execute() is called to provide access to the resource.
+ *  5) The resource consumption is happening outside of the scheduling subsystem.
+ *  6) request->constraint->finishRequest() is called when consumption is finished.
+ *
+ * Steps (5) and (6) can be omitted if constraint is not used by the resource.
+ *
+ * Request can be created on stack or heap.
+ * Request ownership is done outside of the scheduling subsystem.
+ * After (6) request can be destructed safely.
+ *
+ * Request cancelling is not supported yet.
+ */
+class ResourceRequest
+{
+public:
+    /// Cost of request execution; should be filled before request enqueueing.
+    /// NOTE: If cost is not known in advance, credit model can be used:
+    /// NOTE: for the first request use 1 and
+    ResourceCost cost;
+
+    /// Request outcome
+    /// Should be filled during resource consumption
+    bool successful = true;
+
+    /// Scheduler node to be notified on consumption finish
+    /// Auto-filled during request enqueue/dequeue
+    ISchedulerConstraint * constraint = nullptr;
+
+    /// Timestamps for introspection
+    ResourceNs enqueue_ns = 0;
+    ResourceNs execute_ns = 0;
+    ResourceNs finish_ns = 0;
+
+    explicit ResourceRequest(ResourceCost cost_ = 1)
+        : cost(cost_)
+    {}
+
+    virtual ~ResourceRequest() = default;
+
+    /// Callback to trigger resource consumption.
+    /// IMPORTANT: is called from scheduler thread and must be fast,
+    /// just triggering start of a consumption, not doing the consumption itself
+    /// (e.g. setting an std::promise or creating a job in a thread pool)
+    virtual void execute() = 0;
+};
+
+}
--- a/src/IO/SchedulerNodeFactory.h
+++ b/src/IO/SchedulerNodeFactory.h
@ -0,0 +1,57 @@
+#pragma once
+
+#include <Common/ErrorCodes.h>
+#include <Common/Exception.h>
+
+#include <IO/ISchedulerNode.h>
+
+#include <Poco/Util/AbstractConfiguration.h>
+
+#include <boost/noncopyable.hpp>
+
+#include <memory>
+#include <mutex>
+#include <unordered_map>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int INVALID_SCHEDULER_NODE;
+}
+
+class SchedulerNodeFactory : private boost::noncopyable
+{
+public:
+    static SchedulerNodeFactory & instance()
+    {
+        static SchedulerNodeFactory ret;
+        return ret;
+    }
+
+    SchedulerNodePtr get(const String & name, EventQueue * event_queue, const Poco::Util::AbstractConfiguration & config, const String & config_prefix)
+    {
+        std::lock_guard lock{mutex};
+        if (auto iter = methods.find(name); iter != methods.end())
+            return iter->second(event_queue, config, config_prefix);
+        throw Exception(ErrorCodes::INVALID_SCHEDULER_NODE, "Unknown scheduler node type: {}", name);
+    }
+
+    template <class TDerived>
+    void registerMethod(const String & name)
+    {
+        std::lock_guard lock{mutex};
+        methods[name] = [] (EventQueue * event_queue, const Poco::Util::AbstractConfiguration & config, const String & config_prefix)
+        {
+            return std::make_shared<TDerived>(event_queue, config, config_prefix);
+        };
+    }
+
+private:
+    std::mutex mutex;
+    using Method = std::function<SchedulerNodePtr(EventQueue * event_queue, const Poco::Util::AbstractConfiguration & config, const String & config_prefix)>;
+    std::unordered_map<String, Method> methods;
+};
+
+}
--- a/src/IO/SchedulerRoot.h
+++ b/src/IO/SchedulerRoot.h
@ -0,0 +1,250 @@
+#pragma once
+
+#include <base/defines.h>
+
+#include <Common/Stopwatch.h>
+#include <Common/ThreadPool.h>
+
+#include <IO/ISchedulerNode.h>
+#include <IO/ISchedulerConstraint.h>
+
+#include <Poco/Util/XMLConfiguration.h>
+
+#include <unordered_map>
+#include <map>
+#include <memory>
+#include <atomic>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int INVALID_SCHEDULER_NODE;
+}
+
+/*
+ * Resource scheduler root node with a dedicated thread.
+ * Immediate children correspond to different resources.
+ */
+class SchedulerRoot : public ISchedulerNode
+{
+private:
+    struct TResource
+    {
+        SchedulerNodePtr root;
+
+        // Intrusive cyclic list of active resources
+        TResource * next = nullptr;
+        TResource * prev = nullptr;
+
+        explicit TResource(const SchedulerNodePtr & root_)
+            : root(root_)
+        {
+            root->info.parent.ptr = this;
+        }
+
+        // Get pointer stored by ctor in info
+        static TResource * get(SchedulerNodeInfo & info)
+        {
+            return reinterpret_cast<TResource *>(info.parent.ptr);
+        }
+    };
+
+public:
+    SchedulerRoot()
+        : ISchedulerNode(&events)
+    {}
+
+    ~SchedulerRoot() override
+    {
+        stop();
+    }
+
+    /// Runs separate scheduler thread
+    void start()
+    {
+        if (!scheduler.joinable())
+            scheduler = ThreadFromGlobalPool([this] { schedulerThread(); });
+    }
+
+    /// Joins scheduler threads and execute every pending request iff graceful
+    void stop(bool graceful = true)
+    {
+        if (scheduler.joinable())
+        {
+            stop_flag.store(true);
+            events.enqueue([]{}); // just to wake up thread
+            scheduler.join();
+            if (graceful)
+            {
+                // Do the same cycle as schedulerThread() but never block, just exit instead
+                bool has_work = true;
+                while (has_work)
+                {
+                    auto [request, _] = dequeueRequest();
+                    if (request)
+                        execute(request);
+                    else
+                        has_work = false;
+                    while (events.tryProcess())
+                        has_work = true;
+                }
+            }
+        }
+    }
+
+    bool equals(ISchedulerNode * other) override
+    {
+        if (auto * o = dynamic_cast<SchedulerRoot *>(other))
+            return true;
+        return false;
+    }
+
+    void attachChild(const SchedulerNodePtr & child) override
+    {
+        // Take ownership
+        assert(child->parent == nullptr);
+        if (auto [it, inserted] = children.emplace(child.get(), child); !inserted)
+            throw Exception(
+                ErrorCodes::INVALID_SCHEDULER_NODE,
+                "Can't add the same scheduler node twice");
+
+        // Attach
+        child->setParent(this);
+
+        // Activate child if required
+        if (child->isActive())
+            activateChild(child.get());
+    }
+
+    void removeChild(ISchedulerNode * child) override
+    {
+        if (auto iter = children.find(child); iter != children.end())
+        {
+            SchedulerNodePtr removed = iter->second.root;
+
+            // Deactivate if required
+            deactivate(&iter->second);
+
+            // Detach
+            removed->setParent(nullptr);
+
+            // Remove ownership
+            children.erase(iter);
+        }
+    }
+
+    ISchedulerNode * getChild(const String &) override
+    {
+        abort(); // scheduler is allowed to have multiple children with the same name
+    }
+
+    std::pair<ResourceRequest *, bool> dequeueRequest() override
+    {
+        if (current == nullptr) // No active resources
+            return {nullptr, false};
+
+        // Dequeue request from current resource
+        auto [request, resource_active] = current->root->dequeueRequest();
+        assert(request != nullptr);
+
+        // Deactivate resource if required
+        if (!resource_active)
+            deactivate(current);
+        else
+            current = current->next; // Just move round-robin pointer
+
+        return {request, current != nullptr};
+    }
+
+    bool isActive() override
+    {
+        return current != nullptr;
+    }
+
+    void activateChild(ISchedulerNode * child) override
+    {
+        activate(TResource::get(child->info));
+    }
+
+    void setParent(ISchedulerNode *) override
+    {
+        abort(); // scheduler must be the root and this function should not be called
+    }
+
+private:
+    void activate(TResource * value)
+    {
+        assert(value->next == nullptr && value->prev == nullptr);
+        if (current == nullptr) // No active children
+        {
+            current = value;
+            value->prev = value;
+            value->next = value;
+        }
+        else
+        {
+            current->prev->next = value;
+            value->prev = current->prev;
+            current->prev = value;
+            value->next = current;
+        }
+    }
+
+    void deactivate(TResource * value)
+    {
+        if (value->next == nullptr)
+            return; // Already deactivated
+        assert(current != nullptr);
+        if (current == value)
+        {
+            if (current->next == current) // We are going to remove the last active child
+            {
+                value->next = nullptr;
+                value->prev = nullptr;
+                current = nullptr;
+                return;
+            }
+            else // Just move current to next to avoid invalidation
+                current = current->next;
+        }
+        value->prev->next = value->next;
+        value->next->prev = value->prev;
+        value->prev = nullptr;
+        value->next = nullptr;
+    }
+
+private:
+    void schedulerThread()
+    {
+        while (!stop_flag.load())
+        {
+            // Dequeue and execute single request
+            auto [request, _] = dequeueRequest();
+            if (request)
+                execute(request);
+            else // No more requests -- block until any event happens
+                events.process();
+
+            // Process all events before dequeuing to ensure fair competition
+            while (events.tryProcess()) {}
+        }
+    }
+
+    void execute(ResourceRequest * request)
+    {
+        request->execute_ns = clock_gettime_ns();
+        request->execute();
+    }
+
+private:
+    TResource * current = nullptr; // round-robin pointer
+    std::unordered_map<ISchedulerNode *, TResource> children; // resources by pointer
+    std::atomic<bool> stop_flag = false;
+    EventQueue events;
+    ThreadFromGlobalPool scheduler;
+};
+
+}
--- a/src/IO/WriteBufferFromS3.cpp
+++ b/src/IO/WriteBufferFromS3.cpp
@ -72,7 +72,7 @@ WriteBufferFromS3::WriteBufferFromS3(
    std::shared_ptr<const Aws::S3::S3Client> client_ptr_,
    const String & bucket_,
    const String & key_,
-    const S3Settings::RequestSettings & request_settings_,
+    const S3Settings::RequestSettings & request_settings,
    std::optional<std::map<String, String>> object_metadata_,
    size_t buffer_size_,
    ThreadPoolCallbackRunner<void> schedule_,
@ -80,10 +80,12 @@ WriteBufferFromS3::WriteBufferFromS3(
    : BufferWithOwnMemory<WriteBuffer>(buffer_size_, nullptr, 0)
    , bucket(bucket_)
    , key(key_)
-    , request_settings(request_settings_)
+    , settings(request_settings.getUploadSettings())
+    , check_objects_after_upload(request_settings.check_objects_after_upload)
+    , max_unexpected_write_error_retries(request_settings.max_unexpected_write_error_retries)
    , client_ptr(std::move(client_ptr_))
    , object_metadata(std::move(object_metadata_))
-    , upload_part_size(request_settings_.min_upload_part_size)
+    , upload_part_size(settings.min_upload_part_size)
    , schedule(std::move(schedule_))
    , write_settings(write_settings_)
 {
@ -108,9 +110,10 @@ void WriteBufferFromS3::nextImpl()
        write_settings.remote_throttler->add(offset());

    /// Data size exceeds singlepart upload threshold, need to use multipart upload.
-    if (multipart_upload_id.empty() && last_part_size > request_settings.max_single_part_upload_size)
+    if (multipart_upload_id.empty() && last_part_size > settings.max_single_part_upload_size)
        createMultipartUpload();

+    chassert(upload_part_size > 0);
    if (!multipart_upload_id.empty() && last_part_size > upload_part_size)
    {
        writePart();
@ -175,7 +178,7 @@ void WriteBufferFromS3::finalizeImpl()
    if (!multipart_upload_id.empty())
        completeMultipartUpload();

-    if (request_settings.check_objects_after_upload)
+    if (check_objects_after_upload)
    {
        LOG_TRACE(log, "Checking object {} exists after upload", key);

@ -300,15 +303,15 @@ void WriteBufferFromS3::fillUploadRequest(Aws::S3::Model::UploadPartRequest & re
 {
    /// Increase part number.
    ++part_number;
-    if (!multipart_upload_id.empty() && (part_number > request_settings.max_part_number))
+    if (!multipart_upload_id.empty() && (part_number > settings.max_part_number))
    {
        throw Exception(
            ErrorCodes::INVALID_CONFIG_PARAMETER,
            "Part number exceeded {} while writing {} bytes to S3. Check min_upload_part_size = {}, max_upload_part_size = {}, "
            "upload_part_size_multiply_factor = {}, upload_part_size_multiply_parts_count_threshold = {}, max_single_part_upload_size = {}",
-            request_settings.max_part_number, count(), request_settings.min_upload_part_size, request_settings.max_upload_part_size,
-            request_settings.upload_part_size_multiply_factor, request_settings.upload_part_size_multiply_parts_count_threshold,
-            request_settings.max_single_part_upload_size);
+            settings.max_part_number, count(), settings.min_upload_part_size, settings.max_upload_part_size,
+            settings.upload_part_size_multiply_factor, settings.upload_part_size_multiply_parts_count_threshold,
+            settings.max_single_part_upload_size);
    }

    /// Setup request.
@ -323,10 +326,10 @@ void WriteBufferFromS3::fillUploadRequest(Aws::S3::Model::UploadPartRequest & re
    req.SetContentType("binary/octet-stream");

    /// Maybe increase `upload_part_size` (we need to increase it sometimes to keep `part_number` less or equal than `max_part_number`).
-    if (!multipart_upload_id.empty() && (part_number % request_settings.upload_part_size_multiply_parts_count_threshold == 0))
+    if (!multipart_upload_id.empty() && (part_number % settings.upload_part_size_multiply_parts_count_threshold == 0))
    {
-        upload_part_size *= request_settings.upload_part_size_multiply_factor;
-        upload_part_size = std::min(upload_part_size, request_settings.max_upload_part_size);
+        upload_part_size *= settings.upload_part_size_multiply_factor;
+        upload_part_size = std::min(upload_part_size, settings.max_upload_part_size);
    }
 }

@ -371,7 +374,7 @@ void WriteBufferFromS3::completeMultipartUpload()

    req.SetMultipartUpload(multipart_upload);

-    size_t max_retry = std::max(request_settings.max_unexpected_write_error_retries, 1UL);
+    size_t max_retry = std::max(max_unexpected_write_error_retries, 1UL);
    for (size_t i = 0; i < max_retry; ++i)
    {
        ProfileEvents::increment(ProfileEvents::S3CompleteMultipartUpload);
@ -477,7 +480,7 @@ void WriteBufferFromS3::fillPutRequest(Aws::S3::Model::PutObjectRequest & req)

 void WriteBufferFromS3::processPutRequest(const PutObjectTask & task)
 {
-    size_t max_retry = std::max(request_settings.max_unexpected_write_error_retries, 1UL);
+    size_t max_retry = std::max(max_unexpected_write_error_retries, 1UL);
    for (size_t i = 0; i < max_retry; ++i)
    {
        ProfileEvents::increment(ProfileEvents::S3PutObject);
--- a/src/IO/WriteBufferFromS3.h
+++ b/src/IO/WriteBufferFromS3.h
@ -50,7 +50,7 @@ public:
        std::shared_ptr<const Aws::S3::S3Client> client_ptr_,
        const String & bucket_,
        const String & key_,
-        const S3Settings::RequestSettings & request_settings_,
+        const S3Settings::RequestSettings & request_settings,
        std::optional<std::map<String, String>> object_metadata_ = std::nullopt,
        size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE,
        ThreadPoolCallbackRunner<void> schedule_ = {},
@ -88,7 +88,9 @@ private:

    const String bucket;
    const String key;
-    const S3Settings::RequestSettings request_settings;
+    const S3Settings::RequestSettings::PartUploadSettings settings;
+    const bool check_objects_after_upload = false;
+    const size_t max_unexpected_write_error_retries = 4;
    const std::shared_ptr<const Aws::S3::S3Client> client_ptr;
    const std::optional<std::map<String, String>> object_metadata;

--- a/src/Interpreters/Access/InterpreterCreateRoleQuery.cpp
+++ b/src/Interpreters/Access/InterpreterCreateRoleQuery.cpp
@ -40,13 +40,18 @@ BlockIO InterpreterCreateRoleQuery::execute()
    else
        getContext()->checkAccess(AccessType::CREATE_ROLE);

-    if (!query.cluster.empty())
-        return executeDDLQueryOnCluster(query_ptr, getContext());
-
    std::optional<SettingsProfileElements> settings_from_query;
    if (query.settings)
+    {
        settings_from_query = SettingsProfileElements{*query.settings, access_control};

+        if (!query.attach)
+            getContext()->checkSettingsConstraints(*settings_from_query);
+    }
+
+    if (!query.cluster.empty())
+        return executeDDLQueryOnCluster(query_ptr, getContext());
+
    if (query.alter)
    {
        auto update_func = [&](const AccessEntityPtr & entity) -> AccessEntityPtr
--- a/src/Interpreters/Access/InterpreterCreateSettingsProfileQuery.cpp
+++ b/src/Interpreters/Access/InterpreterCreateSettingsProfileQuery.cpp
@ -48,16 +48,21 @@ BlockIO InterpreterCreateSettingsProfileQuery::execute()
    else
        getContext()->checkAccess(AccessType::CREATE_SETTINGS_PROFILE);

+    std::optional<SettingsProfileElements> settings_from_query;
+    if (query.settings)
+    {
+        settings_from_query = SettingsProfileElements{*query.settings, access_control};
+
+        if (!query.attach)
+            getContext()->checkSettingsConstraints(*settings_from_query);
+    }
+
    if (!query.cluster.empty())
    {
        query.replaceCurrentUserTag(getContext()->getUserName());
        return executeDDLQueryOnCluster(query_ptr, getContext());
    }

-    std::optional<SettingsProfileElements> settings_from_query;
-    if (query.settings)
-        settings_from_query = SettingsProfileElements{*query.settings, access_control};
-
    std::optional<RolesOrUsersSet> roles_from_query;
    if (query.to_roles)
        roles_from_query = RolesOrUsersSet{*query.to_roles, access_control, getContext()->getUserID()};
--- a/Show More
+++ b/Show More
				`@ -0,0 +1 @@`
				`Subproject commit 3bd34388573681ce563348cdf04fe15d24770d04`