Merge remote-tracking branch 'origin/master' into clickhouse-help

2024-09-20 00:30:49 +00:00 · 2023-02-07 15:46:59 +00:00 · 2023-02-07 15:46:59 +00:00 · 1baa15d603
commit 1baa15d603
parent ff4e10fdd4 b43ffb98e8
192 changed files with 9781 additions and 943 deletions
--- a/.github/workflows/backport_branches.yml
+++ b/.github/workflows/backport_branches.yml
@ -512,6 +512,75 @@ jobs:
          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
          sudo rm -fr "$TEMP_PATH"
+############################################################################################
+#################################### INSTALL PACKAGES ######################################
+############################################################################################
+  InstallPackagesTestRelease:
+    needs: [BuilderDebRelease]
+    runs-on: [self-hosted, style-checker]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/test_install
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          CHECK_NAME=Install packages (amd64)
+          REPO_COPY=${{runner.temp}}/test_install/ClickHouse
+          EOF
+      - name: Download json reports
+        uses: actions/download-artifact@v3
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: Check out repository code
+        uses: ClickHouse/checkout@v1
+        with:
+          clear-repository: true
+      - name: Test packages installation
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci"
+          python3 install_check.py "$CHECK_NAME"
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH"
+  InstallPackagesTestAarch64:
+    needs: [BuilderDebRelease]
+    runs-on: [self-hosted, style-checker-aarch64]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/test_install
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          CHECK_NAME=Install packages (arm64)
+          REPO_COPY=${{runner.temp}}/test_install/ClickHouse
+          EOF
+      - name: Download json reports
+        uses: actions/download-artifact@v3
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: Check out repository code
+        uses: ClickHouse/checkout@v1
+        with:
+          clear-repository: true
+      - name: Test packages installation
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci"
+          python3 install_check.py "$CHECK_NAME"
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH"
 ##############################################################################################
 ########################### FUNCTIONAl STATELESS TESTS #######################################
 ##############################################################################################
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@ -946,6 +946,75 @@ jobs:
        run: |
          cd "$GITHUB_WORKSPACE/tests/ci"
          python3 mark_release_ready.py
+############################################################################################
+#################################### INSTALL PACKAGES ######################################
+############################################################################################
+  InstallPackagesTestRelease:
+    needs: [BuilderDebRelease]
+    runs-on: [self-hosted, style-checker]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/test_install
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          CHECK_NAME=Install packages (amd64)
+          REPO_COPY=${{runner.temp}}/test_install/ClickHouse
+          EOF
+      - name: Download json reports
+        uses: actions/download-artifact@v3
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: Check out repository code
+        uses: ClickHouse/checkout@v1
+        with:
+          clear-repository: true
+      - name: Test packages installation
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci"
+          python3 install_check.py "$CHECK_NAME"
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH"
+  InstallPackagesTestAarch64:
+    needs: [BuilderDebRelease]
+    runs-on: [self-hosted, style-checker-aarch64]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/test_install
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          CHECK_NAME=Install packages (arm64)
+          REPO_COPY=${{runner.temp}}/test_install/ClickHouse
+          EOF
+      - name: Download json reports
+        uses: actions/download-artifact@v3
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: Check out repository code
+        uses: ClickHouse/checkout@v1
+        with:
+          clear-repository: true
+      - name: Test packages installation
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci"
+          python3 install_check.py "$CHECK_NAME"
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH"
 ##############################################################################################
 ########################### FUNCTIONAl STATELESS TESTS #######################################
 ##############################################################################################
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@ -984,6 +984,75 @@ jobs:
          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
          sudo rm -fr "$TEMP_PATH"
+############################################################################################
+#################################### INSTALL PACKAGES ######################################
+############################################################################################
+  InstallPackagesTestRelease:
+    needs: [BuilderDebRelease]
+    runs-on: [self-hosted, style-checker]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/test_install
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          CHECK_NAME=Install packages (amd64)
+          REPO_COPY=${{runner.temp}}/test_install/ClickHouse
+          EOF
+      - name: Download json reports
+        uses: actions/download-artifact@v3
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: Check out repository code
+        uses: ClickHouse/checkout@v1
+        with:
+          clear-repository: true
+      - name: Test packages installation
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci"
+          python3 install_check.py "$CHECK_NAME"
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH"
+  InstallPackagesTestAarch64:
+    needs: [BuilderDebRelease]
+    runs-on: [self-hosted, style-checker-aarch64]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/test_install
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          CHECK_NAME=Install packages (arm64)
+          REPO_COPY=${{runner.temp}}/test_install/ClickHouse
+          EOF
+      - name: Download json reports
+        uses: actions/download-artifact@v3
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: Check out repository code
+        uses: ClickHouse/checkout@v1
+        with:
+          clear-repository: true
+      - name: Test packages installation
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci"
+          python3 install_check.py "$CHECK_NAME"
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH"
 ##############################################################################################
 ########################### FUNCTIONAl STATELESS TESTS #######################################
 ##############################################################################################
--- a/.github/workflows/release_branches.yml
+++ b/.github/workflows/release_branches.yml
@ -604,6 +604,75 @@ jobs:
        run: |
          cd "$GITHUB_WORKSPACE/tests/ci"
          python3 mark_release_ready.py
+############################################################################################
+#################################### INSTALL PACKAGES ######################################
+############################################################################################
+  InstallPackagesTestRelease:
+    needs: [BuilderDebRelease]
+    runs-on: [self-hosted, style-checker]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/test_install
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          CHECK_NAME=Install packages (amd64)
+          REPO_COPY=${{runner.temp}}/test_install/ClickHouse
+          EOF
+      - name: Download json reports
+        uses: actions/download-artifact@v3
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: Check out repository code
+        uses: ClickHouse/checkout@v1
+        with:
+          clear-repository: true
+      - name: Test packages installation
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci"
+          python3 install_check.py "$CHECK_NAME"
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH"
+  InstallPackagesTestAarch64:
+    needs: [BuilderDebRelease]
+    runs-on: [self-hosted, style-checker-aarch64]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/test_install
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          CHECK_NAME=Install packages (arm64)
+          REPO_COPY=${{runner.temp}}/test_install/ClickHouse
+          EOF
+      - name: Download json reports
+        uses: actions/download-artifact@v3
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: Check out repository code
+        uses: ClickHouse/checkout@v1
+        with:
+          clear-repository: true
+      - name: Test packages installation
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci"
+          python3 install_check.py "$CHECK_NAME"
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH"
 ##############################################################################################
 ########################### FUNCTIONAl STATELESS TESTS #######################################
 ##############################################################################################
--- a/contrib/azure
+++ b/contrib/azure
@ -1 +1 @@
-Subproject commit e4fcdfc81e337e589ce231a452dcc280fcbb3f99
+Subproject commit 096049bf24fffafcaccc132b9367694532716731
--- a/docker/docs/builder/Dockerfile
+++ b/docker/docs/builder/Dockerfile
@ -21,5 +21,3 @@ RUN yarn config set registry https://registry.npmjs.org \
 COPY run.sh /run.sh

 ENTRYPOINT ["/run.sh"]
-
-CMD ["yarn", "build"]
--- a/docker/docs/builder/run.sh
+++ b/docker/docs/builder/run.sh
@ -25,7 +25,8 @@ done
 sed -i '/onBrokenMarkdownLinks:/ s/ignore/error/g' docusaurus.config.js

 if [[ $# -lt 1 ]] || [[ "$1" == "--"* ]]; then
-  export CI=true 
+  export CI=true
+  yarn install
  exec yarn build "$@"
 fi

--- a/docker/images.json
+++ b/docker/images.json
@ -134,6 +134,14 @@
        "name": "clickhouse/keeper-jepsen-test",
        "dependent": []
    },
+    "docker/test/install/deb": {
+        "name": "clickhouse/install-deb-test",
+        "dependent": []
+    },
+    "docker/test/install/rpm": {
+        "name": "clickhouse/install-rpm-test",
+        "dependent": []
+    },
    "docker/docs/builder": {
        "name": "clickhouse/docs-builder",
        "dependent": [
--- a/docker/test/fasttest/run.sh
+++ b/docker/test/fasttest/run.sh
@ -231,6 +231,7 @@ function run_tests
        --hung-check
        --fast-tests-only
        --no-random-settings
+        --no-random-merge-tree-settings
        --no-long
        --testname
        --shard
--- a/docker/test/install/deb/Dockerfile
+++ b/docker/test/install/deb/Dockerfile
@ -0,0 +1,64 @@
+FROM ubuntu:22.04
+
+# The Dockerfile is nicely borrowed from
+# https://github.com/lionelnicolas/docker-ubuntu-systemd/blob/83aa3249146f5df264fe45353f79fc76eb1e42d7/Dockerfile
+
+ENV \
+  DEBIAN_FRONTEND=noninteractive \
+  LANG=C.UTF-8 \
+  container=docker \
+  init=/lib/systemd/systemd
+
+# install systemd packages
+RUN apt-get update && \
+  apt-get install -y --no-install-recommends \
+    systemd \
+    && \
+  apt-get clean && \
+  rm -rf /var/lib/apt/lists
+
+# configure systemd
+# remove systemd 'wants' triggers
+# remove everything except tmpfiles setup in sysinit target
+# remove UTMP updater service
+# disable /tmp mount
+# fix missing BPF firewall support warning
+# just for cosmetics, fix "not-found" entries while using "systemctl --all"
+RUN \
+  find \
+    /etc/systemd/system/*.wants/* \
+    /lib/systemd/system/multi-user.target.wants/* \
+    /lib/systemd/system/sockets.target.wants/*initctl* \
+    ! -type d \
+    -delete && \
+  find \
+    /lib/systemd/system/sysinit.target.wants \
+    ! -type d \
+    ! -name '*systemd-tmpfiles-setup*' \
+    -delete && \
+  find \
+    /lib/systemd \
+    -name systemd-update-utmp-runlevel.service \
+    -delete && \
+  rm -vf /usr/share/systemd/tmp.mount && \
+  sed -ri '/^IPAddressDeny/d' /lib/systemd/system/systemd-journald.service && \
+  for MATCH in \
+    plymouth-start.service \
+    plymouth-quit-wait.service \
+    syslog.socket \
+    syslog.service \
+    display-manager.service \
+    systemd-sysusers.service \
+    tmp.mount \
+    systemd-udevd.service \
+    ; do \
+      grep -rn --binary-files=without-match  ${MATCH} /lib/systemd/ | cut -d: -f1 | xargs sed -ri 's/(.*=.*)'${MATCH}'(.*)/\1\2/'; \
+  done && \
+  systemctl disable ondemand.service && \
+  systemctl set-default multi-user.target
+
+VOLUME ["/run", "/run/lock"]
+
+STOPSIGNAL SIGRTMIN+3
+
+ENTRYPOINT ["/lib/systemd/systemd"]
--- a/docker/test/install/rpm/Dockerfile
+++ b/docker/test/install/rpm/Dockerfile
@ -0,0 +1,55 @@
+FROM centos:8
+
+# The Dockerfile is nicely borrowed from
+# https://github.com/lionelnicolas/docker-ubuntu-systemd/blob/83aa3249146f5df264fe45353f79fc76eb1e42d7/Dockerfile
+
+ENV \
+  LANG=C.UTF-8 \
+  container=docker \
+  init=/lib/systemd/systemd
+
+# configure systemd
+# remove systemd 'wants' triggers
+# remove everything except tmpfiles setup in sysinit target
+# remove UTMP updater service
+# disable /tmp mount
+# fix missing BPF firewall support warning
+# just for cosmetics, fix "not-found" entries while using "systemctl --all"
+RUN \
+  find \
+    /etc/systemd/system/*.wants/ \
+    /lib/systemd/system/multi-user.target.wants/ \
+    /lib/systemd/system/local-fs.target.wants/ \
+    /lib/systemd/system/sockets.target.wants/*initctl* \
+    ! -type d \
+    -delete && \
+  find \
+    /lib/systemd/system/sysinit.target.wants \
+    ! -type d \
+    ! -name '*systemd-tmpfiles-setup*' \
+    -delete && \
+  find \
+    /lib/systemd \
+    -name systemd-update-utmp-runlevel.service \
+    -delete && \
+  rm -vf /usr/share/systemd/tmp.mount && \
+  sed -ri '/^IPAddressDeny/d' /lib/systemd/system/systemd-journald.service && \
+  for MATCH in \
+    plymouth-start.service \
+    plymouth-quit-wait.service \
+    syslog.socket \
+    syslog.service \
+    display-manager.service \
+    systemd-sysusers.service \
+    tmp.mount \
+    systemd-udevd.service \
+    ; do \
+      grep -rn --binary-files=without-match  ${MATCH} /lib/systemd/ | cut -d: -f1 | xargs sed -ri 's/(.*=.*)'${MATCH}'(.*)/\1\2/'; \
+  done && \
+  systemctl set-default multi-user.target
+
+VOLUME ["/run", "/run/lock"]
+
+STOPSIGNAL SIGRTMIN+3
+
+ENTRYPOINT ["/lib/systemd/systemd"]
--- a/docs/en/engines/table-engines/special/executable.md
+++ b/docs/en/engines/table-engines/special/executable.md
@ -0,0 +1,226 @@
+---
+slug: /en/engines/table-engines/special/executable
+sidebar_position: 40
+sidebar_label:  Executable
+---
+
+# Executable and ExecutablePool Table Engines
+
+The `Executable` and `ExecutablePool` table engines allow you to define a table whose rows are generated from a script that you define (by writing rows to **stdout**). The executable script is stored in the `users_scripts` directory and can read data from any source.
+
+- `Executable` tables: the script is run on every query
+- `ExecutablePool` tables: maintains a pool of persistent processes, and takes processes from the pool for reads
+
+You can optionally include one or more input queries that stream their results to **stdin** for the script to read.
+
+## Creating an Executable Table
+
+The `Executable` table engine requires two parameters: the name of the script and the format of the incoming data. You can optionally pass in one or more input queries:
+
+```sql
+Executable(script_name, format, [input_query...])
+```
+
+Here are the relevant settings for an `Executable` table:
+
+- `send_chunk_header`
+    - Description: Send the number of rows in each chunk before sending a chunk to process. This setting can help to write your script in a more efficient way to preallocate some resources
+    - Default value: false
+- `command_termination_timeout`
+    - Description: Command termination timeout in seconds
+    - Default value: 10
+- `command_read_timeout`
+    - Description: Timeout for reading data from command stdout in milliseconds
+    - Default value: 10000
+- `command_write_timeout`
+    - Description: Timeout for writing data to command stdin in milliseconds
+    - Default value: 10000
+
+
+Let's look at an example. The following Python script is named `my_script.py` and is saved in the `user_scripts` folder. It reads in a number `i` and prints `i` random strings, with each string preceded by a number that is separated by a tab:
+
+```python
+#!/usr/bin/python3
+
+import sys
+import string
+import random
+
+def main():
+
+    # Read input value
+    for number in sys.stdin:
+        i = int(number)
+
+        # Generate some random rows
+        for id in range(0, i):
+            letters = string.ascii_letters
+            random_string =  ''.join(random.choices(letters ,k=10))
+            print(str(id) + '\t' + random_string + '\n', end='')
+
+        # Flush results to stdout
+        sys.stdout.flush()
+
+if __name__ == "__main__":
+    main()
+```
+
+The following `my_executable_table` is built from the output of `my_script.py`, which will generate 10 random strings everytime you run a `SELECT` from `my_executable_table`:
+
+```sql
+CREATE TABLE my_executable_table (
+   x UInt32,
+   y String
+)
+ENGINE = Executable('my_script.py', TabSeparated, (SELECT 10))
+```
+
+Creating the table returns immediately and does not invoke the script. Querying `my_executable_table` causes the script to be invoked:
+
+```sql
+SELECT * FROM my_executable_table
+```
+
+```response
+┌─x─┬─y──────────┐
+│ 0 │ BsnKBsNGNH │
+│ 1 │ mgHfBCUrWM │
+│ 2 │ iDQAVhlygr │
+│ 3 │ uNGwDuXyCk │
+│ 4 │ GcFdQWvoLB │
+│ 5 │ UkciuuOTVO │
+│ 6 │ HoKeCdHkbs │
+│ 7 │ xRvySxqAcR │
+│ 8 │ LKbXPHpyDI │
+│ 9 │ zxogHTzEVV │
+└───┴────────────┘
+```
+
+## Passing Query Results to a Script
+
+Users of the Hacker News website leave comments. Python contains a natural language processing toolkit (`nltk`) with a `SentimentIntensityAnalyzer` for determining if comments are positive, negative, or neutral - including assigning a value between -1 (a very negative comment) and 1 (a very positive comment). Let's create an `Executable` table that computes the sentiment of Hacker News comments using `nltk`.
+
+This example uses the `hackernews` table described [here](https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/invertedindexes/#full-text-search-of-the-hacker-news-dataset). The `hackernews` table includes an `id` column of type `UInt64` and a `String` column named `comment`. Let's start by defining the `Executable` table:
+
+```sql
+CREATE TABLE sentiment (
+   id UInt64,
+   sentiment Float32
+)
+ENGINE = Executable(
+    'sentiment.py',
+    TabSeparated,
+    (SELECT id, comment FROM hackernews WHERE id > 0 AND comment != '' LIMIT 20)
+);
+```
+
+Some comments about the `sentiment` table:
+
+- The file `sentiment.py` is saved in the `user_scripts` folder (the default folder of the `user_scripts_path` setting)
+- The `TabSeparated` format means our Python script needs to generate rows of raw data that contain tab-separated values
+- The query selects two columns from `hackernews`. The Python script will need to parse out those column values from the incoming rows
+
+Here is the defintion of `sentiment.py`:
+
+```python
+#!/usr/local/bin/python3.9
+
+import sys
+import nltk
+from nltk.sentiment import SentimentIntensityAnalyzer
+
+def main():
+    sentiment_analyzer = SentimentIntensityAnalyzer()
+
+    while True:
+        try:
+            row = sys.stdin.readline()
+            if row == '':
+                break
+
+            split_line = row.split("\t")
+
+            id = str(split_line[0])
+            comment = split_line[1]
+
+            score = sentiment_analyzer.polarity_scores(comment)['compound']
+            print(id + '\t' + str(score) + '\n', end='')
+            sys.stdout.flush()
+        except BaseException as x:
+            break
+
+if __name__ == "__main__":
+    main()
+```
+
+Some comments about our Python script:
+
+- For this to work, you will need to run `nltk.downloader.download('vader_lexicon')`. This could have been placed in the script, but then it would have been downloaded every time a query was executed on the `sentiment` table - which is not efficient
+- Each value of `row` is going to be a row in the result set of `SELECT id, comment FROM hackernews WHERE id > 0 AND comment != '' LIMIT 20`
+- The incoming row is tab-separated, so we parse out the `id` and `comment` using the Python `split` function
+- The result of `polarity_scores` is a JSON object with a handful of values. We decided to just grab the `compound` value of this JSON object
+- Recall that the `sentiment` table in ClickHouse uses the `TabSeparated` format and contains two columns, so our `print` function separates those columns with a tab
+
+Every time you write a query that selects rows from the `sentiment` table, the `SELECT id, comment FROM hackernews WHERE id > 0 AND comment != '' LIMIT 20` query is executed and the result is passed to `sentiment.py`. Let's test it out:
+
+```sql
+SELECT *
+FROM sentiment
+```
+
+The response looks like:
+
+```response
+┌───────id─┬─sentiment─┐
+│  7398199 │    0.4404 │
+│ 21640317 │    0.1779 │
+│ 21462000 │         0 │
+│ 25168863 │         0 │
+│ 25168978 │   -0.1531 │
+│ 25169359 │         0 │
+│ 25169394 │   -0.9231 │
+│ 25169766 │    0.4137 │
+│ 25172570 │    0.7469 │
+│ 25173687 │    0.6249 │
+│ 28291534 │         0 │
+│ 28291669 │   -0.4767 │
+│ 28291731 │         0 │
+│ 28291949 │   -0.4767 │
+│ 28292004 │    0.3612 │
+│ 28292050 │    -0.296 │
+│ 28292322 │         0 │
+│ 28295172 │    0.7717 │
+│ 28295288 │    0.4404 │
+│ 21465723 │   -0.6956 │
+└──────────┴───────────┘
+```
+
+
+## Creating an ExecutablePool Table
+
+The syntax for `ExecutablePool` is similar to `Executable`, but there are a couple of relevant settings unique to an `ExecutablePool` table:
+
+- `pool_size`
+    - Description: Processes pool size. If size is 0, then there are no size restrictions
+    - Default value: 16
+- `max_command_execution_time`
+    - Description: Max command execution time in seconds
+    - Default value: 10
+
+We can easily convert the `sentiment` table above to use `ExecutablePool` instead of `Executable`:
+
+```sql
+CREATE TABLE sentiment_pooled (
+   id UInt64,
+   sentiment Float32
+)
+ENGINE = ExecutablePool(
+	'sentiment.py',
+	TabSeparated,
+	(SELECT id, comment FROM hackernews WHERE id > 0 AND comment != '' LIMIT 20000)
+)
+SETTINGS
+	pool_size = 4;
+```
+
+ClickHouse will maintain 4 processes on-demand when your client queries the `sentiment_pooled` table.
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@ -1279,7 +1279,9 @@ The following settings are available:
 -   `size`: The maximum cache size in bytes. 0 means the query cache is disabled. Default value: `1073741824` (1 GiB).
 -   `max_entries`: The maximum number of `SELECT` query results stored in the cache. Default value: `1024`.
 -   `max_entry_size`: The maximum size in bytes `SELECT` query results may have to be saved in the cache. Default value: `1048576` (1 MiB).
-   `max_entry_records`: The maximum number of records `SELECT` query results may have to be saved in the cache. Default value: `30000000` (30 mil).
+-   `max_entry_rows`: The maximum number of rows `SELECT` query results may have to be saved in the cache. Default value: `30000000` (30 mil).
+
+Changed settings take effect immediately.

 :::warning
 Data for the query cache is allocated in DRAM. If memory is scarce, make sure to set a small value for `size` or disable the query cache altogether.
@ -1292,7 +1294,7 @@ Data for the query cache is allocated in DRAM. If memory is scarce, make sure to
    <size>1073741824</size>
    <max_entries>1024</max_entries>
    <max_entry_size>1048576</max_entry_size>
-    <max_entry_records>30000000</max_entry_records>
+    <max_entry_rows>30000000</max_entry_rows>
 </query_cache>
 ```

--- a/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md
@ -1,5 +1,5 @@
 ---
-slug: /en/sql-reference/aggregate-functions/reference/sparkbar
+slug: /en/sql-reference/aggregate-functions/reference/sparkbar
 sidebar_position: 311
 sidebar_label: sparkbar
 ---
@ -7,9 +7,11 @@ sidebar_label: sparkbar
 # sparkbar

 The function plots a frequency histogram for values `x` and the repetition rate `y` of these values over the interval `[min_x, max_x]`.
+Repetitions for all `x` falling into the same bucket are averaged, so data should be pre-aggregated.
+Negative repetitions are ignored.

-
-If no interval is specified, then the minimum `x` is used as the interval start, and the maximum `x` — as the interval end. 
+If no interval is specified, then the minimum `x` is used as the interval start, and the maximum `x` — as the interval end.
+Otherwise, values outside the interval are ignored.

 **Syntax**

@ -37,29 +39,24 @@ sparkbar(width[, min_x, max_x])(x, y)
 Query:

 ``` sql
-CREATE TABLE spark_bar_data (`cnt` UInt64,`event_date` Date) ENGINE = MergeTree ORDER BY event_date SETTINGS index_granularity = 8192;
- 
-INSERT INTO spark_bar_data VALUES(1,'2020-01-01'),(4,'2020-01-02'),(5,'2020-01-03'),(2,'2020-01-04'),(3,'2020-01-05'),(7,'2020-01-06'),(6,'2020-01-07'),(8,'2020-01-08'),(2,'2020-01-11');
+CREATE TABLE spark_bar_data (`value` Int64, `event_date` Date) ENGINE = MergeTree ORDER BY event_date;

-SELECT sparkbar(9)(event_date,cnt) FROM spark_bar_data;
+INSERT INTO spark_bar_data VALUES (1,'2020-01-01'), (3,'2020-01-02'), (4,'2020-01-02'), (-3,'2020-01-02'), (5,'2020-01-03'), (2,'2020-01-04'), (3,'2020-01-05'), (7,'2020-01-06'), (6,'2020-01-07'), (8,'2020-01-08'), (2,'2020-01-11');

-SELECT sparkbar(9,toDate('2020-01-01'),toDate('2020-01-10'))(event_date,cnt) FROM spark_bar_data;
+SELECT sparkbar(9)(event_date,cnt) FROM (SELECT sum(value) as cnt, event_date FROM spark_bar_data GROUP BY event_date);
+
+SELECT sparkbar(9, toDate('2020-01-01'), toDate('2020-01-10'))(event_date,cnt) FROM (SELECT sum(value) as cnt, event_date FROM spark_bar_data GROUP BY event_date);
 ```

 Result:

 ``` text
-
 ┌─sparkbar(9)(event_date, cnt)─┐
-│                              │
-│ ▁▅▄▃██▅ ▁                   │
-│                              │
+│ ▂▅▂▃▆█  ▂                    │
 └──────────────────────────────┘

 ┌─sparkbar(9, toDate('2020-01-01'), toDate('2020-01-10'))(event_date, cnt)─┐
-│                                                                          │
-│▁▄▄▂▅▇█▁                                                                 │
-│                                                                          │
+│ ▂▅▂▃▇▆█                                                                  │
 └──────────────────────────────────────────────────────────────────────────┘
 ```

--- a/docs/en/sql-reference/table-functions/executable.md
+++ b/docs/en/sql-reference/table-functions/executable.md
@ -0,0 +1,97 @@
+---
+slug: /en/engines/table-functions/executable
+sidebar_position: 55
+sidebar_label:  executable
+keywords: [udf, user defined function, clickhouse, executable, table, function]
+---
+
+# executable Table Function for UDFs
+
+The `executable` table function creates a table based on the output of a user-defined function (UDF) that you define in a script that outputs rows to **stdout**. The executable script is stored in the `users_scripts` directory and can read data from any source.
+
+You can optionally include one or more input queries that stream their results to **stdin** for the script to read.
+
+:::note
+A key advantage between ordinary UDF functions and the `executable` table function and `Executable` table engine is that ordinary UDF functions cannot change the row count. For example, if the input is 100 rows, then the result must return 100 rows. When using the `executable` table function or `Executable` table engine, your script can make any data transformations you want, including complex aggregations.
+:::
+
+## Syntax
+
+The `executable` table function requires three parameters and accepts an optional list of input queries:
+
+```sql
+executable(script_name, format, structure, [input_query...])
+```
+
+- `script_name`: the file name of the script. saved in the `user_scripts` folder (the default folder of the `user_scripts_path` setting)
+- `format`: the format of the generated table
+- `structure`: the table schema of the generated table
+- `input_query`: an optional query (or collection or queries) whose results are passed to the script via **stdin**
+
+:::note
+If you are going to invoke the same script repeatedly with the same input queries, consider using the [`Executable` table engine](../../engines/table-engines/special/executable.md).
+:::
+
+The following Python script is named `generate_random.py` and is saved in the `user_scripts` folder. It reads in a number `i` and prints `i` random strings, with each string preceded by a number that is separated by a tab:
+
+```python
+#!/usr/local/bin/python3.9
+
+import sys
+import string
+import random
+
+def main():
+
+    # Read input value
+    for number in sys.stdin:
+        i = int(number)
+
+        # Generate some random rows
+        for id in range(0, i):
+            letters = string.ascii_letters
+            random_string =  ''.join(random.choices(letters ,k=10))
+            print(str(id) + '\t' + random_string + '\n', end='')
+
+        # Flush results to stdout
+        sys.stdout.flush()
+
+if __name__ == "__main__":
+    main()
+```
+
+Let's invoke the script and have it generate 10 random strings:
+
+```sql
+SELECT * FROM executable('my_script.py', TabSeparated, 'id UInt32, random String', (SELECT 10))
+```
+
+The response looks like:
+
+```response
+┌─id─┬─random─────┐
+│  0 │ xheXXCiSkH │
+│  1 │ AqxvHAoTrl │
+│  2 │ JYvPCEbIkY │
+│  3 │ sWgnqJwGRm │
+│  4 │ fTZGrjcLon │
+│  5 │ ZQINGktPnd │
+│  6 │ YFSvGGoezb │
+│  7 │ QyMJJZOOia │
+│  8 │ NfiyDDhmcI │
+│  9 │ REJRdJpWrg │
+└────┴────────────┘
+```
+
+## Passing Query Results to a Script
+
+Be sure to check out the example in the `Executable` table engine on [how to pass query results to a script](../../engines/table-engines/special/executable#passing-query-results-to-a-script). Here is how you execute the same script in that example using the `executable` table function:
+
+```sql
+SELECT * FROM executable(
+    'sentiment.py',
+    TabSeparated,
+    'id UInt64, sentiment Float32',
+    (SELECT id, comment FROM hackernews WHERE id > 0 AND comment != '' LIMIT 20)
+);
+```
--- a/docs/ru/sql-reference/aggregate-functions/reference/sparkbar.md
+++ b/docs/ru/sql-reference/aggregate-functions/reference/sparkbar.md
@ -1,14 +1,15 @@
 ---
-slug: /ru/sql-reference/aggregate-functions/reference/sparkbar
+slug: /ru/sql-reference/aggregate-functions/reference/sparkbar
 sidebar_position: 311
 sidebar_label: sparkbar
 ---

 # sparkbar {#sparkbar}

-Функция строит гистограмму частот по заданным значениям `x` и частоте повторения этих значений `y` на интервале `[min_x, max_x]`. 
+Функция строит гистограмму частот по заданным значениям `x` и частоте повторения этих значений `y` на интервале `[min_x, max_x]`. Повторения для всех `x`, попавших в один бакет, усредняются, поэтому данные должны быть предварительно агрегированы. Отрицательные повторения игнорируются.

 Если интервал для построения не указан, то в качестве нижней границы интервала будет взято минимальное значение `x`, а в качестве верхней границы — максимальное значение `x`.
+Значения `x` вне указанного интервала игнорируются.


 **Синтаксис**
@ -39,29 +40,23 @@ sparkbar(width[, min_x, max_x])(x, y)
 Запрос:

 ``` sql
-CREATE TABLE spark_bar_data (`cnt` UInt64,`event_date` Date) ENGINE = MergeTree ORDER BY event_date SETTINGS index_granularity = 8192;
- 
-INSERT INTO spark_bar_data VALUES(1,'2020-01-01'),(4,'2020-01-02'),(5,'2020-01-03'),(2,'2020-01-04'),(3,'2020-01-05'),(7,'2020-01-06'),(6,'2020-01-07'),(8,'2020-01-08'),(2,'2020-01-11');
+CREATE TABLE spark_bar_data (`value` Int64, `event_date` Date) ENGINE = MergeTree ORDER BY event_date;

-SELECT sparkbar(9)(event_date,cnt) FROM spark_bar_data;
+INSERT INTO spark_bar_data VALUES (1,'2020-01-01'), (3,'2020-01-02'), (4,'2020-01-02'), (-3,'2020-01-02'), (5,'2020-01-03'), (2,'2020-01-04'), (3,'2020-01-05'), (7,'2020-01-06'), (6,'2020-01-07'), (8,'2020-01-08'), (2,'2020-01-11');

-SELECT sparkbar(9,toDate('2020-01-01'),toDate('2020-01-10'))(event_date,cnt) FROM spark_bar_data;
+SELECT sparkbar(9)(event_date,cnt) FROM (SELECT sum(value) as cnt, event_date FROM spark_bar_data GROUP BY event_date);
+
+SELECT sparkbar(9, toDate('2020-01-01'), toDate('2020-01-10'))(event_date,cnt) FROM (SELECT sum(value) as cnt, event_date FROM spark_bar_data GROUP BY event_date);
 ```

 Результат:

 ``` text
-
 ┌─sparkbar(9)(event_date, cnt)─┐
-│                              │
-│ ▁▅▄▃██▅ ▁                   │
-│                              │
+│ ▂▅▂▃▆█  ▂                    │
 └──────────────────────────────┘

 ┌─sparkbar(9, toDate('2020-01-01'), toDate('2020-01-10'))(event_date, cnt)─┐
-│                                                                          │
-│▁▄▄▂▅▇█▁                                                                 │
-│                                                                          │
+│ ▂▅▂▃▇▆█                                                                  │
 └──────────────────────────────────────────────────────────────────────────┘
 ```
-
--- a/packages/build
+++ b/packages/build
@ -102,7 +102,8 @@ done
 EOF
    chmod +x "$PKG_PATH/install/doinst.sh"
    if [ -f "$PKG_PATH/DEBIAN/postinst" ]; then
-        tail +2 "$PKG_PATH/DEBIAN/postinst" >> "$PKG_PATH/install/doinst.sh"
+        # we don't need debconf source in doinst in any case
+        tail +2 "$PKG_PATH/DEBIAN/postinst" | grep -v debconf/confmodule >> "$PKG_PATH/install/doinst.sh"
    fi
    rm -rf "$PKG_PATH/DEBIAN"
    if [ -f "/usr/bin/pigz" ]; then
--- a/packages/clickhouse-keeper.postinstall
+++ b/packages/clickhouse-keeper.postinstall
@ -0,0 +1,46 @@
+#!/bin/sh
+set -e
+# set -x
+
+PROGRAM=clickhouse-keeper
+KEEPER_USER=${KEEPER_USER:=clickhouse}
+KEEPER_GROUP=${KEEPER_GROUP:=clickhouse}
+# Please note that we don't support paths with whitespaces. This is rather ignorant.
+KEEPER_CONFDIR=${KEEPER_CONFDIR:=/etc/$PROGRAM}
+KEEPER_DATADIR=${KEEPER_DATADIR:=/var/lib/clickhouse}
+KEEPER_LOGDIR=${KEEPER_LOGDIR:=/var/log/$PROGRAM}
+
+[ -f /usr/share/debconf/confmodule ] && . /usr/share/debconf/confmodule
+[ -f /etc/default/clickhouse-keeper ] && . /etc/default/clickhouse-keeper
+
+if [ ! -f "/etc/debian_version" ]; then
+    not_deb_os=1
+fi
+
+if [ "$1" = configure ] || [ -n "$not_deb_os" ]; then
+    if ! getent group "${KEEPER_GROUP}" > /dev/null 2>&1 ; then
+        groupadd --system "${KEEPER_GROUP}"
+    fi
+    GID=$(getent group "${KEEPER_GROUP}" | cut -d: -f 3)
+    if ! id "${KEEPER_USER}" > /dev/null 2>&1 ; then
+        adduser --system --home /dev/null --no-create-home \
+            --gid "${GID}" --shell /bin/false \
+            "${KEEPER_USER}"
+    fi
+
+    chown -R "${KEEPER_USER}:${KEEPER_GROUP}" "${KEEPER_CONFDIR}"
+    chmod 0755 "${KEEPER_CONFDIR}"
+
+    if ! [ -d "${KEEPER_DATADIR}" ]; then
+        mkdir -p "${KEEPER_DATADIR}"
+        chown -R "${KEEPER_USER}:${KEEPER_GROUP}" "${KEEPER_DATADIR}"
+        chmod 0700 "${KEEPER_DATADIR}"
+    fi
+
+    if ! [ -d "${KEEPER_LOGDIR}" ]; then
+        mkdir -p "${KEEPER_LOGDIR}"
+        chown -R "${KEEPER_USER}:${KEEPER_GROUP}" "${KEEPER_LOGDIR}"
+        chmod 0770 "${KEEPER_LOGDIR}"
+    fi
+fi
+# vim: ts=4: sw=4: sts=4: expandtab
--- a/packages/clickhouse-keeper.service
+++ b/packages/clickhouse-keeper.service
@ -0,0 +1,27 @@
+[Unit]
+Description=ClickHouse Keeper - zookeeper compatible distributed coordination server
+Requires=network-online.target
+# NOTE: that After/Wants=time-sync.target is not enough, you need to ensure
+# that the time was adjusted already, if you use systemd-timesyncd you are
+# safe, but if you use ntp or some other daemon, you should configure it
+# additionaly.
+After=time-sync.target network-online.target
+Wants=time-sync.target
+
+[Service]
+Type=simple
+User=clickhouse
+Group=clickhouse
+Restart=always
+RestartSec=30
+RuntimeDirectory=%p  # %p is resolved to the systemd unit name
+ExecStart=/usr/bin/clickhouse-keeper --config=/etc/clickhouse-keeper/keeper_config.xml --pid-file=%t/%p/%p.pid
+# Minus means that this file is optional.
+EnvironmentFile=-/etc/default/%p
+LimitCORE=infinity
+LimitNOFILE=500000
+CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE CAP_NET_BIND_SERVICE
+
+[Install]
+# ClickHouse should not start from the rescue shell (rescue.target).
+WantedBy=multi-user.target
--- a/packages/clickhouse-keeper.yaml
+++ b/packages/clickhouse-keeper.yaml
@ -30,6 +30,8 @@ contents:
  type: config|noreplace
 - src: root/usr/bin/clickhouse-keeper
  dst: /usr/bin/clickhouse-keeper
+- src: clickhouse-keeper.service
+  dst: /lib/systemd/system/clickhouse-keeper.service
 # docs
 - src: ../AUTHORS
  dst: /usr/share/doc/clickhouse-keeper/AUTHORS
@ -39,3 +41,6 @@ contents:
  dst: /usr/share/doc/clickhouse-keeper/LICENSE
 - src: ../README.md
  dst: /usr/share/doc/clickhouse-keeper/README.md
+
+scripts:
+  postinstall: ./clickhouse-keeper.postinstall
--- a/packages/clickhouse-server.postinstall
+++ b/packages/clickhouse-server.postinstall
@ -11,8 +11,6 @@ CLICKHOUSE_DATADIR=${CLICKHOUSE_DATADIR:=/var/lib/clickhouse}
 CLICKHOUSE_LOGDIR=${CLICKHOUSE_LOGDIR:=/var/log/clickhouse-server}
 CLICKHOUSE_BINDIR=${CLICKHOUSE_BINDIR:=/usr/bin}
 CLICKHOUSE_GENERIC_PROGRAM=${CLICKHOUSE_GENERIC_PROGRAM:=clickhouse}
-EXTRACT_FROM_CONFIG=${CLICKHOUSE_GENERIC_PROGRAM}-extract-from-config
-CLICKHOUSE_CONFIG=$CLICKHOUSE_CONFDIR/config.xml
 CLICKHOUSE_PIDDIR=/var/run/$PROGRAM

 [ -f /usr/share/debconf/confmodule ] && . /usr/share/debconf/confmodule
--- a/packages/clickhouse-server.service
+++ b/packages/clickhouse-server.service
@ -17,10 +17,10 @@ User=clickhouse
 Group=clickhouse
 Restart=always
 RestartSec=30
-RuntimeDirectory=clickhouse-server
-ExecStart=/usr/bin/clickhouse-server --config=/etc/clickhouse-server/config.xml --pid-file=/run/clickhouse-server/clickhouse-server.pid
+RuntimeDirectory=%p  # %p is resolved to the systemd unit name
+ExecStart=/usr/bin/clickhouse-server --config=/etc/clickhouse-server/config.xml --pid-file=%t/%p/%p.pid
 # Minus means that this file is optional.
-EnvironmentFile=-/etc/default/clickhouse
+EnvironmentFile=-/etc/default/%p
 LimitCORE=infinity
 LimitNOFILE=500000
 CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE CAP_NET_BIND_SERVICE
--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@ -1,3 +1,4 @@
+#include <boost/algorithm/string/join.hpp>
 #include <cstdlib>
 #include <fcntl.h>
 #include <map>
@ -538,24 +539,28 @@ void Client::connect()
 // Prints changed settings to stderr. Useful for debugging fuzzing failures.
 void Client::printChangedSettings() const
 {
-    const auto & changes = global_context->getSettingsRef().changes();
-    if (!changes.empty())
+    auto print_changes = [](const auto & changes, std::string_view settings_name)
    {
-        fmt::print(stderr, "Changed settings: ");
-        for (size_t i = 0; i < changes.size(); ++i)
+        if (!changes.empty())
        {
-            if (i)
+            fmt::print(stderr, "Changed {}: ", settings_name);
+            for (size_t i = 0; i < changes.size(); ++i)
            {
-                fmt::print(stderr, ", ");
+                if (i)
+                    fmt::print(stderr, ", ");
+                fmt::print(stderr, "{} = '{}'", changes[i].name, toString(changes[i].value));
            }
-            fmt::print(stderr, "{} = '{}'", changes[i].name, toString(changes[i].value));
+
+            fmt::print(stderr, "\n");
        }
-        fmt::print(stderr, "\n");
-    }
-    else
-    {
-        fmt::print(stderr, "No changed settings.\n");
-    }
+        else
+        {
+            fmt::print(stderr, "No changed {}.\n", settings_name);
+        }
+    };
+
+    print_changes(global_context->getSettingsRef().changes(), "settings");
+    print_changes(cmd_merge_tree_settings.changes(), "MergeTree settings");
 }


@ -1387,6 +1392,8 @@ void Client::readArguments(
            }
            else if (arg == "--allow_repeated_settings")
                allow_repeated_settings = true;
+            else if (arg == "--allow_merge_tree_settings")
+                allow_merge_tree_settings = true;
            else
                common_arguments.emplace_back(arg);
        }
--- a/programs/keeper/Keeper.cpp
+++ b/programs/keeper/Keeper.cpp
@ -362,6 +362,7 @@ try
    else
        path = std::filesystem::path{KEEPER_DEFAULT_PATH};

+    std::filesystem::create_directories(path);

    /// Check that the process user id matches the owner of the data.
    const auto effective_user_id = geteuid();
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@ -82,9 +82,7 @@
 #include <Common/ThreadFuzzer.h>
 #include <Common/getHashOfLoadedBinary.h>
 #include <Common/filesystemHelpers.h>
-#if USE_BORINGSSL
 #include <Compression/CompressionCodecEncrypted.h>
-#endif
 #include <Server/HTTP/HTTPServerConnectionFactory.h>
 #include <Server/MySQLHandlerFactory.h>
 #include <Server/PostgreSQLHandlerFactory.h>
@ -1348,9 +1346,8 @@ try

            global_context->updateStorageConfiguration(*config);
            global_context->updateInterserverCredentials(*config);
-#if USE_BORINGSSL
+            global_context->updateQueryCacheConfiguration(*config);
            CompressionCodecEncrypted::Configuration::instance().tryLoad(*config, "encryption_codecs");
-#endif
 #if USE_SSL
            CertificateReloader::instance().tryLoad(*config);
 #endif
@ -1534,13 +1531,7 @@ try
        global_context->setMMappedFileCache(mmap_cache_size);

    /// A cache for query results.
-    size_t query_cache_size = config().getUInt64("query_cache.size", 1_GiB);
-    if (query_cache_size)
-        global_context->setQueryCache(
-            query_cache_size,
-            config().getUInt64("query_cache.max_entries", 1024),
-            config().getUInt64("query_cache.max_entry_size", 1_MiB),
-            config().getUInt64("query_cache.max_entry_records", 30'000'000));
+    global_context->setQueryCache(config());

 #if USE_EMBEDDED_COMPILER
    /// 128 MB
@ -1564,10 +1555,8 @@ try
        global_context->getMergeTreeSettings().sanityCheck(background_pool_tasks);
        global_context->getReplicatedMergeTreeSettings().sanityCheck(background_pool_tasks);
    }
-#if USE_BORINGSSL
    /// try set up encryption. There are some errors in config, error will be printed and server wouldn't start.
    CompressionCodecEncrypted::Configuration::instance().load(config(), "encryption_codecs");
-#endif

    SCOPE_EXIT({
        async_metrics.stop();
--- a/programs/server/config.xml
+++ b/programs/server/config.xml
@ -1516,7 +1516,7 @@
    <!--     <size>1073741824</size> -->
    <!--     <max_entries>1024</max_entries> -->
    <!--     <max_entry_size>1048576</max_entry_size> -->
-    <!--     <max_entry_records>30000000</max_entry_records> -->
+    <!--     <max_entry_rows>30000000</max_entry_rows> -->
    <!-- </query_cache> -->

    <!-- Uncomment if enable merge tree metadata cache -->
--- a/rust/skim/src/lib.rs
+++ b/rust/skim/src/lib.rs
@ -10,22 +10,25 @@ mod ffi {
 }

 struct Item {
-    text: String,
+    text_no_newlines: String,
    orig_text: String,
 }
 impl Item {
    fn new(text: String) -> Self {
        return Self{
+            // Text that will be printed by skim, and will be used for matching.
+            //
            // Text that will be shown should not contains new lines since in this case skim may
            // live some symbols on the screen, and this looks odd.
-            text: text.replace("\n", " "),
+            text_no_newlines: text.replace("\n", " "),
+            // This will be used when the match had been selected.
            orig_text: text,
        };
    }
 }
 impl SkimItem for Item {
    fn text(&self) -> Cow<str> {
-        return Cow::Borrowed(&self.text);
+        return Cow::Borrowed(&self.text_no_newlines);
    }

    fn output(&self) -> Cow<str> {
@ -44,6 +47,24 @@ fn skim(prefix: &CxxString, words: &CxxVector<CxxString>) -> Result<String, Stri
        .query(Some(prefix.to_str().unwrap()))
        .tac(true)
        .tiebreak(Some("-score".to_string()))
+        // Exact mode performs better for SQL.
+        //
+        // Default fuzzy search is too smart for SQL, it even takes into account the case, which
+        // should not be accounted (you don't want to type "SELECT" instead of "select" to find the
+        // query).
+        //
+        // Exact matching seems better algorithm for SQL, it is not 100% exact, it splits by space,
+        // and apply separate matcher actually for each word.
+        // Note, that if you think that "space is not enough" as the delimiter, then you should
+        // first know that this is the delimiter only for the input query, so to match
+        // "system.query_log" you can use "sy qu log"
+        // Also it should be more common for users who did not know how to use fuzzy search.
+        // (also you can disable exact mode by prepending "'" char).
+        //
+        // Also it ignores the case correctly, i.e. it does not have penalty for case mismatch,
+        // like fuzzy algorithms (take a look at SkimScoreConfig::penalty_case_mismatch).
+        .exact(true)
+        .case(CaseMatching::Ignore)
        .build()
        .unwrap();

--- a/src/AggregateFunctions/AggregateFunctionMLMethod.cpp
+++ b/src/AggregateFunctions/AggregateFunctionMLMethod.cpp
@ -247,15 +247,8 @@ void Adam::merge(const IWeightsUpdater & rhs, Float64 frac, Float64 rhs_frac)
    if (adam_rhs.average_gradient.empty())
        return;

-    if (average_gradient.empty())
-    {
-        if (!average_squared_gradient.empty() ||
-                adam_rhs.average_gradient.size() != adam_rhs.average_squared_gradient.size())
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Average_gradient and average_squared_gradient must have same size");
-
-        average_gradient.resize(adam_rhs.average_gradient.size(), Float64{0.0});
-        average_squared_gradient.resize(adam_rhs.average_squared_gradient.size(), Float64{0.0});
-    }
+    average_gradient.resize(adam_rhs.average_gradient.size(), Float64{0.0});
+    average_squared_gradient.resize(adam_rhs.average_squared_gradient.size(), Float64{0.0});

    for (size_t i = 0; i < average_gradient.size(); ++i)
    {
@ -268,14 +261,8 @@ void Adam::merge(const IWeightsUpdater & rhs, Float64 frac, Float64 rhs_frac)

 void Adam::update(UInt64 batch_size, std::vector<Float64> & weights, Float64 & bias, Float64 learning_rate, const std::vector<Float64> & batch_gradient)
 {
-    if (average_gradient.empty())
-    {
-        if (!average_squared_gradient.empty())
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Average_gradient and average_squared_gradient must have same size");
-
-        average_gradient.resize(batch_gradient.size(), Float64{0.0});
-        average_squared_gradient.resize(batch_gradient.size(), Float64{0.0});
-    }
+    average_gradient.resize(batch_gradient.size(), Float64{0.0});
+    average_squared_gradient.resize(batch_gradient.size(), Float64{0.0});

    for (size_t i = 0; i != average_gradient.size(); ++i)
    {
@ -328,8 +315,7 @@ void Nesterov::write(WriteBuffer & buf) const
 void Nesterov::merge(const IWeightsUpdater & rhs, Float64 frac, Float64 rhs_frac)
 {
    const auto & nesterov_rhs = static_cast<const Nesterov &>(rhs);
-    if (accumulated_gradient.empty())
-        accumulated_gradient.resize(nesterov_rhs.accumulated_gradient.size(), Float64{0.0});
+    accumulated_gradient.resize(nesterov_rhs.accumulated_gradient.size(), Float64{0.0});

    for (size_t i = 0; i < accumulated_gradient.size(); ++i)
    {
@ -339,10 +325,7 @@ void Nesterov::merge(const IWeightsUpdater & rhs, Float64 frac, Float64 rhs_frac

 void Nesterov::update(UInt64 batch_size, std::vector<Float64> & weights, Float64 & bias, Float64 learning_rate, const std::vector<Float64> & batch_gradient)
 {
-    if (accumulated_gradient.empty())
-    {
-        accumulated_gradient.resize(batch_gradient.size(), Float64{0.0});
-    }
+    accumulated_gradient.resize(batch_gradient.size(), Float64{0.0});

    for (size_t i = 0; i < batch_gradient.size(); ++i)
    {
@ -402,10 +385,7 @@ void Momentum::merge(const IWeightsUpdater & rhs, Float64 frac, Float64 rhs_frac
 void Momentum::update(UInt64 batch_size, std::vector<Float64> & weights, Float64 & bias, Float64 learning_rate, const std::vector<Float64> & batch_gradient)
 {
    /// batch_size is already checked to be greater than 0
-    if (accumulated_gradient.empty())
-    {
-        accumulated_gradient.resize(batch_gradient.size(), Float64{0.0});
-    }
+    accumulated_gradient.resize(batch_gradient.size(), Float64{0.0});

    for (size_t i = 0; i < batch_gradient.size(); ++i)
    {
--- a/src/AggregateFunctions/AggregateFunctionMLMethod.h
+++ b/src/AggregateFunctions/AggregateFunctionMLMethod.h
@ -149,9 +149,11 @@ public:
 class Momentum : public IWeightsUpdater
 {
 public:
-    Momentum() = default;

-    explicit Momentum(Float64 alpha_) : alpha(alpha_) {}
+    explicit Momentum(size_t num_params, Float64 alpha_ = 0.1) : alpha(alpha_)
+    {
+        accumulated_gradient.resize(num_params + 1, 0);
+    }

    void update(UInt64 batch_size, std::vector<Float64> & weights, Float64 & bias, Float64 learning_rate, const std::vector<Float64> & batch_gradient) override;

@ -170,9 +172,10 @@ private:
 class Nesterov : public IWeightsUpdater
 {
 public:
-    Nesterov() = default;
-
-    explicit Nesterov(Float64 alpha_) : alpha(alpha_) {}
+    explicit Nesterov(size_t num_params, Float64 alpha_ = 0.9) : alpha(alpha_)
+    {
+        accumulated_gradient.resize(num_params + 1, 0);
+    }

    void addToBatch(
        std::vector<Float64> & batch_gradient,
@ -201,10 +204,14 @@ private:
 class Adam : public IWeightsUpdater
 {
 public:
-    Adam()
+    Adam(size_t num_params)
    {
        beta1_powered = beta1;
        beta2_powered = beta2;
+
+
+        average_gradient.resize(num_params + 1, 0);
+        average_squared_gradient.resize(num_params + 1, 0);
    }

    void addToBatch(
@ -338,11 +345,11 @@ public:
        if (weights_updater_name == "SGD")
            new_weights_updater = std::make_shared<StochasticGradientDescent>();
        else if (weights_updater_name == "Momentum")
-            new_weights_updater = std::make_shared<Momentum>();
+            new_weights_updater = std::make_shared<Momentum>(param_num);
        else if (weights_updater_name == "Nesterov")
-            new_weights_updater = std::make_shared<Nesterov>();
+            new_weights_updater = std::make_shared<Nesterov>(param_num);
        else if (weights_updater_name == "Adam")
-            new_weights_updater = std::make_shared<Adam>();
+            new_weights_updater = std::make_shared<Adam>(param_num);
        else
            throw Exception(ErrorCodes::LOGICAL_ERROR, "Illegal name of weights updater (should have been checked earlier)");

--- a/src/AggregateFunctions/AggregateFunctionSparkbar.cpp
+++ b/src/AggregateFunctions/AggregateFunctionSparkbar.cpp
@ -50,11 +50,13 @@ AggregateFunctionPtr createAggregateFunctionSparkbar(const std::string & name, c
    assertBinary(name, arguments);

    if (params.size() != 1 && params.size() != 3)
-        throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "The number of params does not match for aggregate function {}", name);
+        throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+            "The number of params does not match for aggregate function '{}', expected 1 or 3, got {}", name, params.size());

    if (params.size() == 3)
    {
-        if (params.at(1).getType() != arguments[0]->getDefault().getType() || params.at(2).getType() != arguments[0]->getDefault().getType())
+        if (params.at(1).getType() != arguments[0]->getDefault().getType() ||
+            params.at(2).getType() != arguments[0]->getDefault().getType())
        {
            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
                            "The second and third parameters are not the same type as the first arguments for aggregate function {}", name);
@ -63,7 +65,6 @@ AggregateFunctionPtr createAggregateFunctionSparkbar(const std::string & name, c
    return createAggregateFunctionSparkbarImpl(name, *arguments[0], *arguments[1], arguments, params);
 }

-
 }

 void registerAggregateFunctionSparkbar(AggregateFunctionFactory & factory)
--- a/src/AggregateFunctions/AggregateFunctionSparkbar.h
+++ b/src/AggregateFunctions/AggregateFunctionSparkbar.h
@ -18,10 +18,15 @@
 namespace DB
 {

+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+}
+
 template<typename X, typename Y>
 struct AggregateFunctionSparkbarData
 {
-
+    /// TODO: calculate histogram instead of storing all points
    using Points = HashMap<X, Y>;
    Points points;

@ -31,20 +36,26 @@ struct AggregateFunctionSparkbarData
    Y min_y = std::numeric_limits<Y>::max();
    Y max_y = std::numeric_limits<Y>::lowest();

-    void insert(const X & x, const Y & y)
+    Y insert(const X & x, const Y & y)
    {
-        auto result = points.insert({x, y});
-        if (!result.second)
-            result.first->getMapped() += y;
+        if (isNaN(y) || y <= 0)
+            return 0;
+
+        auto [it, inserted] = points.insert({x, y});
+        if (!inserted)
+            it->getMapped() += y;
+        return it->getMapped();
    }

    void add(X x, Y y)
    {
-        insert(x, y);
+        auto new_y = insert(x, y);
+
        min_x = std::min(x, min_x);
        max_x = std::max(x, max_x);
+
        min_y = std::min(y, min_y);
-        max_y = std::max(y, max_y);
+        max_y = std::max(new_y, max_y);
    }

    void merge(const AggregateFunctionSparkbarData & other)
@ -53,10 +64,14 @@ struct AggregateFunctionSparkbarData
            return;

        for (auto & point : other.points)
-            insert(point.getKey(), point.getMapped());
+        {
+            auto new_y = insert(point.getKey(), point.getMapped());
+            max_y = std::max(new_y, max_y);
+        }

        min_x = std::min(other.min_x, min_x);
        max_x = std::max(other.max_x, max_x);
+
        min_y = std::min(other.min_y, min_y);
        max_y = std::max(other.max_y, max_y);
    }
@ -85,7 +100,6 @@ struct AggregateFunctionSparkbarData
        size_t size;
        readVarUInt(size, buf);

-        /// TODO Protection against huge size
        X x;
        Y y;
        for (size_t i = 0; i < size; ++i)
@ -95,7 +109,6 @@ struct AggregateFunctionSparkbarData
            insert(x, y);
        }
    }
-
 };

 template<typename X, typename Y>
@ -104,16 +117,17 @@ class AggregateFunctionSparkbar final
 {

 private:
-    size_t width;
-    X min_x;
-    X max_x;
-    bool specified_min_max_x;
+    const size_t width = 0;

-    template <class T>
-    size_t updateFrame(ColumnString::Chars & frame, const T value) const
+    /// Range for x specified in parameters.
+    const bool is_specified_range_x = false;
+    const X begin_x = std::numeric_limits<X>::min();
+    const X end_x = std::numeric_limits<X>::max();
+
+    size_t updateFrame(ColumnString::Chars & frame, Y value) const
    {
        static constexpr std::array<std::string_view, 9> bars{" ", "▁", "▂", "▃", "▄", "▅", "▆", "▇", "█"};
-        const auto & bar = (isNaN(value) || value > 8 || value < 1) ? bars[0] : bars[static_cast<UInt8>(value)];
+        const auto & bar = (isNaN(value) || value < 1 || 8 < value) ? bars[0] : bars[static_cast<UInt8>(value)];
        frame.insert(bar.begin(), bar.end());
        return bar.size();
    }
@ -122,161 +136,108 @@ private:
     *  The minimum value of y is rendered as the lowest height "▁",
     *  the maximum value of y is rendered as the highest height "█", and the middle value will be rendered proportionally.
     *  If a bucket has no y value, it will be rendered as " ".
-     *  If the actual number of buckets is greater than the specified bucket, it will be compressed by width.
-     *  For example, there are actually 11 buckets, specify 10 buckets, and divide the 11 buckets as follows (11/10):
-     *  0.0-1.1, 1.1-2.2, 2.2-3.3, 3.3-4.4, 4.4-5.5, 5.5-6.6, 6.6-7.7, 7.7-8.8, 8.8-9.9, 9.9-11.
-     *  The y value of the first bucket will be calculated as follows:
-     *  the actual y value of the first position + the actual second position y*0.1, and the remaining y*0.9 is reserved for the next bucket.
-     *  The next bucket will use the last y*0.9 + the actual third position y*0.2, and the remaining y*0.8 will be reserved for the next bucket. And so on.
     */
    void render(ColumnString & to_column, const AggregateFunctionSparkbarData<X, Y> & data) const
    {
-        size_t sz = 0;
        auto & values = to_column.getChars();
        auto & offsets = to_column.getOffsets();
-        auto update_column = [&] ()
+
+        if (data.points.empty())
        {
+            values.push_back('\0');
+            offsets.push_back(offsets.empty() ? 1 : offsets.back() + 1);
+            return;
+        }
+
+        auto from_x = is_specified_range_x ? begin_x : data.min_x;
+        auto to_x = is_specified_range_x ? end_x : data.max_x;
+
+        if (from_x >= to_x)
+        {
+            size_t sz = updateFrame(values, 8);
            values.push_back('\0');
            offsets.push_back(offsets.empty() ? sz + 1 : offsets.back() + sz + 1);
-        };
-
-        if (data.points.empty() || !width)
-            return update_column();
-
-        size_t diff_x;
-        X min_x_local;
-        if (specified_min_max_x)
-        {
-            diff_x = max_x - min_x;
-            min_x_local = min_x;
-        }
-        else
-        {
-            diff_x = data.max_x - data.min_x;
-            min_x_local = data.min_x;
+            return;
        }

-        if ((diff_x + 1) <= width)
-        {
-            Y min_y = data.min_y;
-            Y max_y = data.max_y;
-            Float64 diff_y = max_y - min_y;
+        PaddedPODArray<Y> histogram(width, 0);
+        PaddedPODArray<UInt64> fhistogram(width, 0);

-            if (diff_y != 0.0)
+        for (const auto & point : data.points)
+        {
+            if (point.getKey() < from_x || to_x < point.getKey())
+                continue;
+
+            X delta = to_x - from_x;
+            if (delta < std::numeric_limits<X>::max())
+                delta = delta + 1;
+
+            X value = point.getKey() - from_x;
+            Float64 w = histogram.size();
+            size_t index = std::min<size_t>(static_cast<size_t>(w / delta * value), histogram.size() - 1);
+
+            if (std::numeric_limits<Y>::max() - histogram[index] > point.getMapped())
            {
-                for (size_t i = 0; i <= diff_x; ++i)
-                {
-                    auto it = data.points.find(static_cast<X>(min_x_local + i));
-                    bool found = it != data.points.end();
-                    sz += updateFrame(values, found ? std::round(((it->getMapped() - min_y) / diff_y) * 7) + 1 : 0.0);
-                }
+                histogram[index] += point.getMapped();
+                fhistogram[index] += 1;
            }
            else
            {
-                for (size_t i = 0; i <= diff_x; ++i)
-                    sz += updateFrame(values, data.points.has(min_x_local + static_cast<X>(i)) ? 1 : 0);
+                /// In case of overflow, just saturate
+                histogram[index] = std::numeric_limits<Y>::max();
            }
        }
-        else
+
+        for (size_t i = 0; i < histogram.size(); ++i)
        {
-            // begin reshapes to width buckets
-            Float64 multiple_d = (diff_x + 1) / static_cast<Float64>(width);
-
-            std::optional<Float64> min_y;
-            std::optional<Float64> max_y;
-
-            std::optional<Float64> new_y;
-            std::vector<std::optional<Float64>> new_points;
-            new_points.reserve(width);
-
-            std::pair<size_t, Float64> bound{0, 0.0};
-            size_t cur_bucket_num = 0;
-            // upper bound for bucket
-            auto upper_bound = [&](size_t bucket_num)
-            {
-                bound.second = (bucket_num + 1) * multiple_d;
-                bound.first = static_cast<size_t>(std::floor(bound.second));
-            };
-            upper_bound(cur_bucket_num);
-            for (size_t i = 0; i <= (diff_x + 1); ++i)
-            {
-                if (i == bound.first) // is bound
-                {
-                    Float64 proportion = bound.second - bound.first;
-                    auto it = data.points.find(min_x_local + static_cast<X>(i));
-                    bool found = (it != data.points.end());
-                    if (found && proportion > 0)
-                        new_y = new_y.value_or(0) + it->getMapped() * proportion;
-
-                    if (new_y)
-                    {
-                        Float64 avg_y = new_y.value() / multiple_d;
-
-                        new_points.emplace_back(avg_y);
-                        // If min_y has no value, or if the avg_y of the current bucket is less than min_y, update it.
-                        if (!min_y || avg_y < min_y)
-                            min_y = avg_y;
-                        if (!max_y || avg_y > max_y)
-                            max_y = avg_y;
-                    }
-                    else
-                    {
-                        new_points.emplace_back();
-                    }
-
-                    // next bucket
-                    new_y = found ? ((1 - proportion) * it->getMapped()) : std::optional<Float64>();
-                    upper_bound(++cur_bucket_num);
-                }
-                else
-                {
-                    auto it = data.points.find(min_x_local + static_cast<X>(i));
-                    if (it != data.points.end())
-                        new_y = new_y.value_or(0) + it->getMapped();
-                }
-            }
-
-            if (!min_y || !max_y) // No value is set
-                return update_column();
-
-            Float64 diff_y = max_y.value() - min_y.value();
-
-            auto update_frame = [&] (const std::optional<Float64> & point_y)
-            {
-                sz += updateFrame(values, point_y ? std::round(((point_y.value() - min_y.value()) / diff_y) * 7) + 1 : 0);
-            };
-            auto update_frame_for_constant = [&] (const std::optional<Float64> & point_y)
-            {
-                sz += updateFrame(values, point_y ? 1 : 0);
-            };
-
-            if (diff_y != 0.0)
-                std::for_each(new_points.begin(), new_points.end(), update_frame);
-            else
-                std::for_each(new_points.begin(), new_points.end(), update_frame_for_constant);
+            if (fhistogram[i] > 0)
+                histogram[i] /= fhistogram[i];
        }
-        update_column();
+
+        Y y_max = 0;
+        for (auto & y : histogram)
+        {
+            if (isNaN(y) || y <= 0)
+                continue;
+            y_max = std::max(y_max, y);
+        }
+
+        if (y_max == 0)
+        {
+            values.push_back('\0');
+            offsets.push_back(offsets.empty() ? 1 : offsets.back() + 1);
+            return;
+        }
+
+        for (auto & y : histogram)
+        {
+            if (isNaN(y) || y <= 0)
+                y = 0;
+            else
+                y = y * 7 / y_max + 1;
+        }
+
+        size_t sz = 0;
+        for (const auto & y : histogram)
+            sz += updateFrame(values, y);
+
+        values.push_back('\0');
+        offsets.push_back(offsets.empty() ? sz + 1 : offsets.back() + sz + 1);
    }

-
 public:
    AggregateFunctionSparkbar(const DataTypes & arguments, const Array & params)
-        : IAggregateFunctionDataHelper<AggregateFunctionSparkbarData<X, Y>, AggregateFunctionSparkbar>(
-        arguments, params, std::make_shared<DataTypeString>())
+        : IAggregateFunctionDataHelper<AggregateFunctionSparkbarData<X, Y>, AggregateFunctionSparkbar>(arguments, params, std::make_shared<DataTypeString>())
+        , width(params.empty() ? 0 : params.at(0).safeGet<UInt64>())
+        , is_specified_range_x(params.size() >= 3)
+        , begin_x(is_specified_range_x ? static_cast<X>(params.at(1).safeGet<X>()) : std::numeric_limits<X>::min())
+        , end_x(is_specified_range_x ? static_cast<X>(params.at(2).safeGet<X>()) : std::numeric_limits<X>::max())
    {
-        width = params.at(0).safeGet<UInt64>();
-        if (params.size() == 3)
-        {
-            specified_min_max_x = true;
-            min_x = static_cast<X>(params.at(1).safeGet<X>());
-            max_x = static_cast<X>(params.at(2).safeGet<X>());
-        }
-        else
-        {
-            specified_min_max_x = false;
-            min_x = std::numeric_limits<X>::min();
-            max_x = std::numeric_limits<X>::max();
-        }
+        if (width < 2 || 1024 < width)
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter width must be in range [2, 1024]");
+
+        if (begin_x >= end_x)
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter `min_x` must be less than `max_x`");
    }

    String getName() const override
@ -287,7 +248,7 @@ public:
    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * /*arena*/) const override
    {
        X x = assert_cast<const ColumnVector<X> *>(columns[0])->getData()[row_num];
-        if (min_x <= x && x <= max_x)
+        if (begin_x <= x && x <= end_x)
        {
            Y y = assert_cast<const ColumnVector<Y> *>(columns[1])->getData()[row_num];
            this->data(place).add(x, y);
--- a/src/Backups/BackupImpl.cpp
+++ b/src/Backups/BackupImpl.cpp
@ -953,7 +953,12 @@ void BackupImpl::writeFile(const String & file_name, BackupEntryPtr entry)
    {
        LOG_TRACE(log, "Will copy file {}", adjusted_path);

-        if (!num_entries)
+        bool has_entries = false;
+        {
+            std::lock_guard lock{mutex};
+            has_entries = num_entries > 0;
+        }
+        if (!has_entries)
            checkLockFile(true);

        if (use_archives)
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -226,13 +226,7 @@ add_object_library(clickhouse_access Access)
 add_object_library(clickhouse_backups Backups)
 add_object_library(clickhouse_core Core)
 add_object_library(clickhouse_core_mysql Core/MySQL)
-if (ENABLE_OPENSSL OR ENABLE_OPENSSL_DYNAMIC)
-    add_headers_and_sources(dbms Compression)
-    list(REMOVE_ITEM dbms_headers Compression/CompressionCodecEncrypted.h)
-    list(REMOVE_ITEM dbms_sources Compression/CompressionCodecEncrypted.cpp)
-else ()
-    add_object_library(clickhouse_compression Compression)
-endif ()
+add_object_library(clickhouse_compression Compression)
 add_object_library(clickhouse_querypipeline QueryPipeline)
 add_object_library(clickhouse_datatypes DataTypes)
 add_object_library(clickhouse_datatypes_serializations DataTypes/Serializations)
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@ -41,6 +41,7 @@
 #include <Parsers/ASTLiteral.h>
 #include <Parsers/ASTIdentifier.h>
 #include <Parsers/ASTColumnDeclaration.h>
+#include <Parsers/ASTFunction.h>
 #include <Parsers/Kusto/ParserKQLStatement.h>

 #include <Processors/Formats/Impl/NullFormat.h>
@ -816,17 +817,15 @@ void ClientBase::processTextAsSingleQuery(const String & full_query)

 void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr parsed_query)
 {
-    if (fake_drop)
-    {
-        if (parsed_query->as<ASTDropQuery>())
-            return;
-    }
+    if (fake_drop && parsed_query->as<ASTDropQuery>())
+        return;
+
+    auto query = query_to_execute;

    /// Rewrite query only when we have query parameters.
    /// Note that if query is rewritten, comments in query are lost.
    /// But the user often wants to see comments in server logs, query log, processlist, etc.
    /// For recent versions of the server query parameters will be transferred by network and applied on the server side.
-    auto query = query_to_execute;
    if (!query_parameters.empty()
        && connection->getServerRevision(connection_parameters.timeouts) < DBMS_MIN_PROTOCOL_VERSION_WITH_PARAMETERS)
    {
@ -838,6 +837,22 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa
        query = serializeAST(*parsed_query);
    }

+    if (allow_merge_tree_settings && parsed_query->as<ASTCreateQuery>())
+    {
+        /// Rewrite query if new settings were added.
+        if (addMergeTreeSettings(*parsed_query->as<ASTCreateQuery>()))
+        {
+            /// Replace query parameters because AST cannot be serialized otherwise.
+            if (!query_parameters.empty())
+            {
+                ReplaceQueryParameterVisitor visitor(query_parameters);
+                visitor.visit(parsed_query);
+            }
+
+            query = serializeAST(*parsed_query);
+        }
+    }
+
    int retries_left = 10;
    while (retries_left)
    {
@ -2065,6 +2080,41 @@ void ClientBase::initQueryIdFormats()
 }


+bool ClientBase::addMergeTreeSettings(ASTCreateQuery & ast_create)
+{
+    if (ast_create.attach
+        || !ast_create.storage
+        || !ast_create.storage->isExtendedStorageDefinition()
+        || !ast_create.storage->engine
+        || ast_create.storage->engine->name.find("MergeTree") == std::string::npos)
+        return false;
+
+    auto all_changed = cmd_merge_tree_settings.allChanged();
+    if (all_changed.begin() == all_changed.end())
+        return false;
+
+    if (!ast_create.storage->settings)
+    {
+        auto settings_ast = std::make_shared<ASTSetQuery>();
+        settings_ast->is_standalone = false;
+        ast_create.storage->set(ast_create.storage->settings, settings_ast);
+    }
+
+    auto & storage_settings = *ast_create.storage->settings;
+    bool added_new_setting = false;
+
+    for (const auto & setting : all_changed)
+    {
+        if (!storage_settings.changes.tryGet(setting.getName()))
+        {
+            storage_settings.changes.emplace_back(setting.getName(), setting.getValue());
+            added_new_setting = true;
+        }
+    }
+
+    return added_new_setting;
+}
+
 void ClientBase::runInteractive()
 {
    if (config().has("query_id"))
@ -2302,6 +2352,30 @@ void ClientBase::parseAndCheckOptions(OptionsDescription & options_description,
        cmd_settings.addProgramOptionsAsMultitokens(options_description.main_description.value());
    else
        cmd_settings.addProgramOptions(options_description.main_description.value());
+
+    if (allow_merge_tree_settings)
+    {
+        /// Add merge tree settings manually, because names of some settings
+        /// may clash. Query settings have higher priority and we just
+        /// skip ambiguous merge tree settings.
+        auto & main_options = options_description.main_description.value();
+
+        NameSet main_option_names;
+        for (const auto & option : main_options.options())
+            main_option_names.insert(option->long_name());
+
+        for (const auto & setting : cmd_merge_tree_settings.all())
+        {
+            if (main_option_names.contains(setting.getName()))
+                continue;
+
+            if (allow_repeated_settings)
+                cmd_merge_tree_settings.addProgramOptionAsMultitoken(main_options, setting);
+            else
+                cmd_merge_tree_settings.addProgramOption(main_options, setting);
+        }
+    }
+
    /// Parse main commandline options.
    auto parser = po::command_line_parser(arguments).options(options_description.main_description.value()).allow_unregistered();
    po::parsed_options parsed = parser.run();
--- a/src/Client/ClientBase.h
+++ b/src/Client/ClientBase.h
@ -1,6 +1,7 @@
 #pragma once

 #include "Common/NamePrompter.h"
+#include <Parsers/ASTCreateQuery.h>
 #include <Common/ProgressIndication.h>
 #include <Common/InterruptListener.h>
 #include <Common/ShellCommand.h>
@ -14,6 +15,7 @@
 #include <boost/program_options.hpp>
 #include <Storages/StorageFile.h>
 #include <Storages/SelectQueryInfo.h>
+#include <Storages/MergeTree/MergeTreeSettings.h>


 namespace po = boost::program_options;
@ -164,6 +166,7 @@ private:
    void updateSuggest(const ASTPtr & ast);

    void initQueryIdFormats();
+    bool addMergeTreeSettings(ASTCreateQuery & ast_create);

 protected:
    static bool isSyncInsertWithData(const ASTInsertQuery & insert_query, const ContextPtr & context);
@ -212,6 +215,7 @@ protected:

    /// Settings specified via command line args
    Settings cmd_settings;
+    MergeTreeSettings cmd_merge_tree_settings;

    /// thread status should be destructed before shared context because it relies on process list.
    std::optional<ThreadStatus> thread_status;
@ -298,6 +302,7 @@ protected:
    std::vector<HostAndPort> hosts_and_ports{};

    bool allow_repeated_settings = false;
+    bool allow_merge_tree_settings = false;

    bool cancelled = false;

--- a/src/Columns/ColumnArray.cpp
+++ b/src/Columns/ColumnArray.cpp
@ -1,5 +1,6 @@
 #include <Columns/ColumnArray.h>
 #include <Columns/ColumnsNumber.h>
+#include <Columns/ColumnDecimal.h>
 #include <Columns/ColumnString.h>
 #include <Columns/ColumnTuple.h>
 #include <Columns/ColumnNullable.h>
@ -549,19 +550,48 @@ void ColumnArray::insertRangeFrom(const IColumn & src, size_t start, size_t leng

 ColumnPtr ColumnArray::filter(const Filter & filt, ssize_t result_size_hint) const
 {
-    if (typeid_cast<const ColumnUInt8 *>(data.get()))      return filterNumber<UInt8>(filt, result_size_hint);
-    if (typeid_cast<const ColumnUInt16 *>(data.get()))     return filterNumber<UInt16>(filt, result_size_hint);
-    if (typeid_cast<const ColumnUInt32 *>(data.get()))     return filterNumber<UInt32>(filt, result_size_hint);
-    if (typeid_cast<const ColumnUInt64 *>(data.get()))     return filterNumber<UInt64>(filt, result_size_hint);
-    if (typeid_cast<const ColumnInt8 *>(data.get()))       return filterNumber<Int8>(filt, result_size_hint);
-    if (typeid_cast<const ColumnInt16 *>(data.get()))      return filterNumber<Int16>(filt, result_size_hint);
-    if (typeid_cast<const ColumnInt32 *>(data.get()))      return filterNumber<Int32>(filt, result_size_hint);
-    if (typeid_cast<const ColumnInt64 *>(data.get()))      return filterNumber<Int64>(filt, result_size_hint);
-    if (typeid_cast<const ColumnFloat32 *>(data.get()))    return filterNumber<Float32>(filt, result_size_hint);
-    if (typeid_cast<const ColumnFloat64 *>(data.get()))    return filterNumber<Float64>(filt, result_size_hint);
-    if (typeid_cast<const ColumnString *>(data.get()))     return filterString(filt, result_size_hint);
-    if (typeid_cast<const ColumnTuple *>(data.get()))      return filterTuple(filt, result_size_hint);
-    if (typeid_cast<const ColumnNullable *>(data.get()))   return filterNullable(filt, result_size_hint);
+    if (typeid_cast<const ColumnUInt8 *>(data.get()))
+        return filterNumber<UInt8>(filt, result_size_hint);
+    if (typeid_cast<const ColumnUInt16 *>(data.get()))
+        return filterNumber<UInt16>(filt, result_size_hint);
+    if (typeid_cast<const ColumnUInt32 *>(data.get()))
+        return filterNumber<UInt32>(filt, result_size_hint);
+    if (typeid_cast<const ColumnUInt64 *>(data.get()))
+        return filterNumber<UInt64>(filt, result_size_hint);
+    if (typeid_cast<const ColumnUInt128 *>(data.get()))
+        return filterNumber<UInt128>(filt, result_size_hint);
+    if (typeid_cast<const ColumnUInt256 *>(data.get()))
+        return filterNumber<UInt256>(filt, result_size_hint);
+    if (typeid_cast<const ColumnInt8 *>(data.get()))
+        return filterNumber<Int8>(filt, result_size_hint);
+    if (typeid_cast<const ColumnInt16 *>(data.get()))
+        return filterNumber<Int16>(filt, result_size_hint);
+    if (typeid_cast<const ColumnInt32 *>(data.get()))
+        return filterNumber<Int32>(filt, result_size_hint);
+    if (typeid_cast<const ColumnInt64 *>(data.get()))
+        return filterNumber<Int64>(filt, result_size_hint);
+    if (typeid_cast<const ColumnInt128 *>(data.get()))
+        return filterNumber<Int128>(filt, result_size_hint);
+    if (typeid_cast<const ColumnInt256 *>(data.get()))
+        return filterNumber<Int256>(filt, result_size_hint);
+    if (typeid_cast<const ColumnFloat32 *>(data.get()))
+        return filterNumber<Float32>(filt, result_size_hint);
+    if (typeid_cast<const ColumnFloat64 *>(data.get()))
+        return filterNumber<Float64>(filt, result_size_hint);
+    if (typeid_cast<const ColumnDecimal<Decimal32> *>(data.get()))
+        return filterNumber<Decimal32>(filt, result_size_hint);
+    if (typeid_cast<const ColumnDecimal<Decimal64> *>(data.get()))
+        return filterNumber<Decimal64>(filt, result_size_hint);
+    if (typeid_cast<const ColumnDecimal<Decimal128> *>(data.get()))
+        return filterNumber<Decimal128>(filt, result_size_hint);
+    if (typeid_cast<const ColumnDecimal<Decimal256> *>(data.get()))
+        return filterNumber<Decimal256>(filt, result_size_hint);
+    if (typeid_cast<const ColumnString *>(data.get()))
+        return filterString(filt, result_size_hint);
+    if (typeid_cast<const ColumnTuple *>(data.get()))
+        return filterTuple(filt, result_size_hint);
+    if (typeid_cast<const ColumnNullable *>(data.get()))
+        return filterNullable(filt, result_size_hint);
    return filterGeneric(filt, result_size_hint);
 }

@ -597,15 +627,17 @@ void ColumnArray::expand(const IColumn::Filter & mask, bool inverted)
 template <typename T>
 ColumnPtr ColumnArray::filterNumber(const Filter & filt, ssize_t result_size_hint) const
 {
+    using ColVecType = ColumnVectorOrDecimal<T>;
+
    if (getOffsets().empty())
        return ColumnArray::create(data);

    auto res = ColumnArray::create(data->cloneEmpty());

-    auto & res_elems = assert_cast<ColumnVector<T> &>(res->getData()).getData();
+    auto & res_elems = assert_cast<ColVecType &>(res->getData()).getData();
    Offsets & res_offsets = res->getOffsets();

-    filterArraysImpl<T>(assert_cast<const ColumnVector<T> &>(*data).getData(), getOffsets(), res_elems, res_offsets, filt, result_size_hint);
+    filterArraysImpl<T>(assert_cast<const ColVecType &>(*data).getData(), getOffsets(), res_elems, res_offsets, filt, result_size_hint);
    return res;
 }

@ -932,20 +964,50 @@ ColumnPtr ColumnArray::replicate(const Offsets & replicate_offsets) const
    if (replicate_offsets.empty())
        return cloneEmpty();

-    if (typeid_cast<const ColumnUInt8 *>(data.get()))    return replicateNumber<UInt8>(replicate_offsets);
-    if (typeid_cast<const ColumnUInt16 *>(data.get()))   return replicateNumber<UInt16>(replicate_offsets);
-    if (typeid_cast<const ColumnUInt32 *>(data.get()))   return replicateNumber<UInt32>(replicate_offsets);
-    if (typeid_cast<const ColumnUInt64 *>(data.get()))   return replicateNumber<UInt64>(replicate_offsets);
-    if (typeid_cast<const ColumnInt8 *>(data.get()))     return replicateNumber<Int8>(replicate_offsets);
-    if (typeid_cast<const ColumnInt16 *>(data.get()))    return replicateNumber<Int16>(replicate_offsets);
-    if (typeid_cast<const ColumnInt32 *>(data.get()))    return replicateNumber<Int32>(replicate_offsets);
-    if (typeid_cast<const ColumnInt64 *>(data.get()))    return replicateNumber<Int64>(replicate_offsets);
-    if (typeid_cast<const ColumnFloat32 *>(data.get()))  return replicateNumber<Float32>(replicate_offsets);
-    if (typeid_cast<const ColumnFloat64 *>(data.get()))  return replicateNumber<Float64>(replicate_offsets);
-    if (typeid_cast<const ColumnString *>(data.get()))   return replicateString(replicate_offsets);
-    if (typeid_cast<const ColumnConst *>(data.get()))    return replicateConst(replicate_offsets);
-    if (typeid_cast<const ColumnNullable *>(data.get())) return replicateNullable(replicate_offsets);
-    if (typeid_cast<const ColumnTuple *>(data.get()))    return replicateTuple(replicate_offsets);
+    if (typeid_cast<const ColumnUInt8 *>(data.get()))
+        return replicateNumber<UInt8>(replicate_offsets);
+    if (typeid_cast<const ColumnUInt16 *>(data.get()))
+        return replicateNumber<UInt16>(replicate_offsets);
+    if (typeid_cast<const ColumnUInt32 *>(data.get()))
+        return replicateNumber<UInt32>(replicate_offsets);
+    if (typeid_cast<const ColumnUInt64 *>(data.get()))
+        return replicateNumber<UInt64>(replicate_offsets);
+    if (typeid_cast<const ColumnUInt128 *>(data.get()))
+        return replicateNumber<UInt128>(replicate_offsets);
+    if (typeid_cast<const ColumnUInt256 *>(data.get()))
+        return replicateNumber<UInt256>(replicate_offsets);
+    if (typeid_cast<const ColumnInt8 *>(data.get()))
+        return replicateNumber<Int8>(replicate_offsets);
+    if (typeid_cast<const ColumnInt16 *>(data.get()))
+        return replicateNumber<Int16>(replicate_offsets);
+    if (typeid_cast<const ColumnInt32 *>(data.get()))
+        return replicateNumber<Int32>(replicate_offsets);
+    if (typeid_cast<const ColumnInt64 *>(data.get()))
+        return replicateNumber<Int64>(replicate_offsets);
+    if (typeid_cast<const ColumnInt128 *>(data.get()))
+        return replicateNumber<Int128>(replicate_offsets);
+    if (typeid_cast<const ColumnInt256 *>(data.get()))
+        return replicateNumber<Int256>(replicate_offsets);
+    if (typeid_cast<const ColumnFloat32 *>(data.get()))
+        return replicateNumber<Float32>(replicate_offsets);
+    if (typeid_cast<const ColumnFloat64 *>(data.get()))
+        return replicateNumber<Float64>(replicate_offsets);
+    if (typeid_cast<const ColumnDecimal<Decimal32> *>(data.get()))
+        return replicateNumber<Decimal32>(replicate_offsets);
+    if (typeid_cast<const ColumnDecimal<Decimal64> *>(data.get()))
+        return replicateNumber<Decimal64>(replicate_offsets);
+    if (typeid_cast<const ColumnDecimal<Decimal128> *>(data.get()))
+        return replicateNumber<Decimal128>(replicate_offsets);
+    if (typeid_cast<const ColumnDecimal<Decimal256> *>(data.get()))
+        return replicateNumber<Decimal256>(replicate_offsets);
+    if (typeid_cast<const ColumnString *>(data.get()))
+        return replicateString(replicate_offsets);
+    if (typeid_cast<const ColumnConst *>(data.get()))
+        return replicateConst(replicate_offsets);
+    if (typeid_cast<const ColumnNullable *>(data.get()))
+        return replicateNullable(replicate_offsets);
+    if (typeid_cast<const ColumnTuple *>(data.get()))
+        return replicateTuple(replicate_offsets);
    return replicateGeneric(replicate_offsets);
 }

@ -953,6 +1015,8 @@ ColumnPtr ColumnArray::replicate(const Offsets & replicate_offsets) const
 template <typename T>
 ColumnPtr ColumnArray::replicateNumber(const Offsets & replicate_offsets) const
 {
+    using ColVecType = ColumnVectorOrDecimal<T>;
+
    size_t col_size = size();
    if (col_size != replicate_offsets.size())
        throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Size of offsets doesn't match size of column.");
@ -964,10 +1028,10 @@ ColumnPtr ColumnArray::replicateNumber(const Offsets & replicate_offsets) const

    ColumnArray & res_arr = typeid_cast<ColumnArray &>(*res);

-    const typename ColumnVector<T>::Container & src_data = typeid_cast<const ColumnVector<T> &>(*data).getData();
+    const typename ColVecType::Container & src_data = typeid_cast<const ColVecType &>(*data).getData();
    const Offsets & src_offsets = getOffsets();

-    typename ColumnVector<T>::Container & res_data = typeid_cast<ColumnVector<T> &>(res_arr.getData()).getData();
+    typename ColVecType::Container & res_data = typeid_cast<ColVecType &>(res_arr.getData()).getData();
    Offsets & res_offsets = res_arr.getOffsets();

    res_data.reserve(data->size() / col_size * replicate_offsets.back());
--- a/src/Columns/ColumnDecimal.h
+++ b/src/Columns/ColumnDecimal.h
@ -59,10 +59,7 @@ public:
    void insertFrom(const IColumn & src, size_t n) override { data.push_back(static_cast<const Self &>(src).getData()[n]); }
    void insertData(const char * src, size_t /*length*/) override;
    void insertDefault() override { data.push_back(T()); }
-    virtual void insertManyDefaults(size_t length) override
-    {
-        data.resize_fill(data.size() + length);
-    }
+    void insertManyDefaults(size_t length) override { data.resize_fill(data.size() + length); }
    void insert(const Field & x) override { data.push_back(x.get<T>()); }
    void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;

--- a/src/Columns/ColumnsCommon.cpp
+++ b/src/Columns/ColumnsCommon.cpp
@ -320,12 +320,20 @@ INSTANTIATE(UInt8)
 INSTANTIATE(UInt16)
 INSTANTIATE(UInt32)
 INSTANTIATE(UInt64)
+INSTANTIATE(UInt128)
+INSTANTIATE(UInt256)
 INSTANTIATE(Int8)
 INSTANTIATE(Int16)
 INSTANTIATE(Int32)
 INSTANTIATE(Int64)
+INSTANTIATE(Int128)
+INSTANTIATE(Int256)
 INSTANTIATE(Float32)
 INSTANTIATE(Float64)
+INSTANTIATE(Decimal32)
+INSTANTIATE(Decimal64)
+INSTANTIATE(Decimal128)
+INSTANTIATE(Decimal256)

 #undef INSTANTIATE

--- a/src/Columns/FilterDescription.cpp
+++ b/src/Columns/FilterDescription.cpp
@ -82,7 +82,12 @@ FilterDescription::FilterDescription(const IColumn & column_)
        const auto size = res.size();
        assert(size == null_map.size());
        for (size_t i = 0; i < size; ++i)
-            res[i] = res[i] && !null_map[i];
+        {
+            auto has_val = static_cast<UInt8>(!!res[i]);
+            auto not_null = static_cast<UInt8>(!null_map[i]);
+            /// Instead of the logical AND operator(&&), the bitwise one(&) is utilized for the auto vectorization.
+            res[i] = has_val & not_null;
+        }

        data = &res;
        data_holder = std::move(mutable_holder);
--- a/src/Common/MultiVersion.h
+++ b/src/Common/MultiVersion.h
@ -1,6 +1,6 @@
 #pragma once

-#include <mutex>
+#include <atomic>
 #include <memory>
 #include <base/defines.h>

@ -32,26 +32,25 @@ public:
    MultiVersion() = default;

    explicit MultiVersion(std::unique_ptr<const T> && value)
+        : current_version(std::move(value))
    {
-        set(std::move(value));
    }

    /// Obtain current version for read-only usage. Returns shared_ptr, that manages lifetime of version.
    Version get() const
    {
-        /// NOTE: is it possible to lock-free replace of shared_ptr?
-        std::lock_guard lock(mutex);
-        return current_version;
+        return std::atomic_load(&current_version);
    }

+    /// TODO: replace atomic_load/store() on shared_ptr (which is deprecated as of C++20) by C++20 std::atomic<std::shared_ptr>.
+    /// Clang 15 currently does not support it.
+
    /// Update an object with new version.
    void set(std::unique_ptr<const T> && value)
    {
-        std::lock_guard lock(mutex);
-        current_version = std::move(value);
+        std::atomic_store(&current_version, Version{std::move(value)});
    }

 private:
-    Version current_version TSA_GUARDED_BY(mutex);
-    mutable std::mutex mutex;
+    Version current_version;
 };
--- a/src/Common/config.h.in
+++ b/src/Common/config.h.in
@ -29,6 +29,7 @@
 #cmakedefine01 USE_FASTOPS
 #cmakedefine01 USE_NLP
 #cmakedefine01 USE_VECTORSCAN
+#cmakedefine01 USE_LIBURING
 #cmakedefine01 USE_AVRO
 #cmakedefine01 USE_CAPNP
 #cmakedefine01 USE_PARQUET
--- a/src/Compression/CompressionCodecEncrypted.cpp
+++ b/src/Compression/CompressionCodecEncrypted.cpp
@ -11,10 +11,14 @@

 // This depends on BoringSSL-specific API, notably <openssl/aead.h>.
 #if USE_SSL
-#include <openssl/digest.h>
-#include <openssl/err.h>
-#include <boost/algorithm/hex.hpp>
-#include <openssl/aead.h>
+#    include <openssl/err.h>
+#    include <boost/algorithm/hex.hpp>
+#    if USE_BORINGSSL
+#        include <openssl/digest.h>
+#        include <openssl/aead.h>
+#    else
+#        include <openssl/evp.h>
+#    endif
 #endif

 // Common part for both parts (with SSL and without)
@ -87,23 +91,6 @@ constexpr size_t nonce_max_size    = 13;   /// Nonce size and one byte to show i
 constexpr size_t actual_nonce_size = 12;   /// Nonce actual size
 const String empty_nonce = {"\0\0\0\0\0\0\0\0\0\0\0\0", actual_nonce_size};

-/// Get encryption/decryption algorithms.
-auto getMethod(EncryptionMethod Method)
-{
-    if (Method == AES_128_GCM_SIV)
-    {
-        return EVP_aead_aes_128_gcm_siv;
-    }
-    else if (Method == AES_256_GCM_SIV)
-    {
-        return EVP_aead_aes_256_gcm_siv;
-    }
-    else
-    {
-        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption Method. Got {}", getMethodName(Method));
-    }
-}
-
 /// Find out key size for each algorithm
 UInt64 methodKeySize(EncryptionMethod Method)
 {
@ -128,6 +115,24 @@ std::string lastErrorString()
    return std::string(buffer.data());
 }

+#if USE_BORINGSSL
+/// Get encryption/decryption algorithms.
+auto getMethod(EncryptionMethod Method)
+{
+    if (Method == AES_128_GCM_SIV)
+    {
+        return EVP_aead_aes_128_gcm_siv;
+    }
+    else if (Method == AES_256_GCM_SIV)
+    {
+        return EVP_aead_aes_256_gcm_siv;
+    }
+    else
+    {
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption Method. Got {}", getMethodName(Method));
+    }
+}
+
 /// Encrypt plaintext with particular algorithm and put result into ciphertext_and_tag.
 /// This function get key and nonce and encrypt text with their help.
 /// If something went wrong (can't init context or can't encrypt data) it throws exception.
@ -186,6 +191,160 @@ size_t decrypt(std::string_view ciphertext, char * plaintext, EncryptionMethod m

    return out_len;
 }
+#else
+/// Get encryption/decryption algorithms.
+auto getMethod(EncryptionMethod Method)
+{
+    if (Method == AES_128_GCM_SIV)
+    {
+        return EVP_aes_128_gcm;
+    }
+    else if (Method == AES_256_GCM_SIV)
+    {
+        return EVP_aes_256_gcm;
+    }
+    else
+    {
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption Method. Got {}", getMethodName(Method));
+    }
+}
+
+/// Encrypt plaintext with particular algorithm and put result into ciphertext_and_tag.
+/// This function get key and nonce and encrypt text with their help.
+/// If something went wrong (can't init context or can't encrypt data) it throws exception.
+/// It returns length of encrypted text.
+size_t encrypt(std::string_view plaintext, char * ciphertext_and_tag, EncryptionMethod method, const String & key, const String & nonce)
+{
+    int out_len;
+    int ciphertext_len;
+    EVP_CIPHER_CTX *encrypt_ctx;
+
+    if (!(encrypt_ctx = EVP_CIPHER_CTX_new()))
+        throw Exception::createDeprecated(lastErrorString(), ErrorCodes::OPENSSL_ERROR);
+
+    try
+    {
+        const int ok_cryptinit = EVP_EncryptInit_ex(encrypt_ctx,
+                                                    getMethod(method)(),
+                                                    nullptr, nullptr, nullptr);
+        if (!ok_cryptinit)
+            throw Exception::createDeprecated(lastErrorString(), ErrorCodes::OPENSSL_ERROR);
+
+        const int ok_cipherctrl = EVP_CIPHER_CTX_ctrl(encrypt_ctx,
+                                                    EVP_CTRL_GCM_SET_IVLEN,
+                                                    static_cast<int32_t>(nonce.size()),
+                                                    nullptr);
+        if (!ok_cipherctrl)
+            throw Exception::createDeprecated(lastErrorString(), ErrorCodes::OPENSSL_ERROR);
+
+        const int ok_nonceinit = EVP_EncryptInit_ex(encrypt_ctx, nullptr, nullptr,
+                                                    reinterpret_cast<const uint8_t*>(key.data()),
+                                                    reinterpret_cast<const uint8_t *>(nonce.data()));
+        if (!ok_nonceinit)
+            throw Exception::createDeprecated(lastErrorString(), ErrorCodes::OPENSSL_ERROR);
+
+        const int ok_encryptupdate = EVP_EncryptUpdate(encrypt_ctx,
+                                                    reinterpret_cast<uint8_t *>(ciphertext_and_tag),
+                                                    &out_len,
+                                                    reinterpret_cast<const uint8_t *>(plaintext.data()),
+                                                    static_cast<int32_t>(plaintext.size()));
+        ciphertext_len = out_len;
+        if (!ok_encryptupdate)
+            throw Exception::createDeprecated(lastErrorString(), ErrorCodes::OPENSSL_ERROR);
+
+        const int ok_encryptfinal = EVP_EncryptFinal_ex(encrypt_ctx,
+                                                        reinterpret_cast<uint8_t *>(ciphertext_and_tag) + out_len,
+                                                        reinterpret_cast<int32_t *>(&out_len));
+        ciphertext_len += out_len;
+        if (!ok_encryptfinal)
+            throw Exception::createDeprecated(lastErrorString(), ErrorCodes::OPENSSL_ERROR);
+
+        /* Get the tag */
+        const int ok_tag = EVP_CIPHER_CTX_ctrl(encrypt_ctx,
+                                            EVP_CTRL_GCM_GET_TAG,
+                                            tag_size,
+                                            reinterpret_cast<uint8_t *>(ciphertext_and_tag) + plaintext.size());
+
+        if (!ok_tag)
+            throw Exception::createDeprecated(lastErrorString(), ErrorCodes::OPENSSL_ERROR);
+    }
+    catch (...)
+    {
+        EVP_CIPHER_CTX_free(encrypt_ctx);
+        throw;
+    }
+    EVP_CIPHER_CTX_free(encrypt_ctx);
+    return ciphertext_len + tag_size;
+}
+
+/// Encrypt plaintext with particular algorithm and put result into ciphertext_and_tag.
+/// This function get key and nonce and encrypt text with their help.
+/// If something went wrong (can't init context or can't encrypt data) it throws exception.
+/// It returns length of encrypted text.
+size_t decrypt(std::string_view ciphertext, char * plaintext, EncryptionMethod method, const String & key, const String & nonce)
+{
+
+    int out_len;
+    int plaintext_len;
+    EVP_CIPHER_CTX *decrypt_ctx;
+
+    if (!(decrypt_ctx = EVP_CIPHER_CTX_new()))
+        throw Exception::createDeprecated(lastErrorString(), ErrorCodes::OPENSSL_ERROR);
+
+    try
+    {
+        const int ok_cryptinit = EVP_DecryptInit_ex(decrypt_ctx,
+                                                    getMethod(method)(),
+                                                    nullptr, nullptr, nullptr);
+        if (!ok_cryptinit)
+            throw Exception::createDeprecated(lastErrorString(), ErrorCodes::OPENSSL_ERROR);
+
+        const int ok_cipherctrl = EVP_CIPHER_CTX_ctrl(decrypt_ctx,
+                                                    EVP_CTRL_GCM_SET_IVLEN,
+                                                    static_cast<int32_t>(nonce.size()), nullptr);
+        if (!ok_cipherctrl)
+            throw Exception::createDeprecated(lastErrorString(), ErrorCodes::OPENSSL_ERROR);
+
+        const int ok_nonceinit = EVP_DecryptInit_ex(decrypt_ctx, nullptr, nullptr,
+                                                    reinterpret_cast<const uint8_t*>(key.data()),
+                                                    reinterpret_cast<const uint8_t *>(nonce.data()));
+        if (!ok_nonceinit)
+            throw Exception::createDeprecated(lastErrorString(), ErrorCodes::OPENSSL_ERROR);
+
+        const int ok_decryptudpate = EVP_DecryptUpdate(decrypt_ctx,
+                                                    reinterpret_cast<uint8_t *>(plaintext),
+                                                    reinterpret_cast<int32_t *>(&out_len),
+                                                    reinterpret_cast<const uint8_t *>(ciphertext.data()),
+                                                    static_cast<int32_t>(ciphertext.size()) - tag_size);
+        plaintext_len = out_len;
+
+        if (!ok_decryptudpate)
+            throw Exception::createDeprecated(lastErrorString(), ErrorCodes::OPENSSL_ERROR);
+
+        const int ok_tag = EVP_CIPHER_CTX_ctrl(decrypt_ctx,
+                                            EVP_CTRL_GCM_SET_TAG,
+                                            tag_size,
+                                            reinterpret_cast<uint8_t *>(const_cast<char *>(ciphertext.data())) + ciphertext.size() - tag_size);
+        if (!ok_tag)
+            throw Exception::createDeprecated(lastErrorString(), ErrorCodes::OPENSSL_ERROR);
+
+        const int ok_decryptfinal = EVP_DecryptFinal_ex(decrypt_ctx,
+                                                        reinterpret_cast<uint8_t *>(plaintext) + out_len,
+                                                        reinterpret_cast<int32_t *>(&out_len));
+
+        if (!ok_decryptfinal)
+            throw Exception::createDeprecated(lastErrorString(), ErrorCodes::OPENSSL_ERROR);
+    }
+    catch (...)
+    {
+        EVP_CIPHER_CTX_free(decrypt_ctx);
+        throw;
+    }
+    EVP_CIPHER_CTX_free(decrypt_ctx);
+
+    return plaintext_len + out_len;
+}
+#endif

 /// Register codec in factory
 void registerEncryptionCodec(CompressionCodecFactory & factory, EncryptionMethod Method)
--- a/src/Compression/CompressionFactory.cpp
+++ b/src/Compression/CompressionFactory.cpp
@ -178,9 +178,7 @@ void registerCodecDelta(CompressionCodecFactory & factory);
 void registerCodecT64(CompressionCodecFactory & factory);
 void registerCodecDoubleDelta(CompressionCodecFactory & factory);
 void registerCodecGorilla(CompressionCodecFactory & factory);
-#if USE_BORINGSSL
 void registerCodecEncrypted(CompressionCodecFactory & factory);
-#endif
 void registerCodecFPC(CompressionCodecFactory & factory);

 #endif
@ -197,9 +195,7 @@ CompressionCodecFactory::CompressionCodecFactory()
    registerCodecT64(*this);
    registerCodecDoubleDelta(*this);
    registerCodecGorilla(*this);
-#if USE_BORINGSSL
    registerCodecEncrypted(*this);
-#endif
    registerCodecFPC(*this);
    #ifdef ENABLE_QPL_COMPRESSION
        registerCodecDeflateQpl(*this);
--- a/src/Core/BaseSettings.h
+++ b/src/Core/BaseSettings.h
@ -5,10 +5,17 @@
 #include <base/range.h>
 #include <boost/blank.hpp>
 #include <unordered_map>
+#include <boost/program_options/options_description.hpp>


+namespace boost::program_options
+{
+    class options_description;
+}
+
 namespace DB
 {
+
 class ReadBuffer;
 class WriteBuffer;

@ -19,7 +26,6 @@ enum class SettingsWriteFormat
    DEFAULT = STRINGS_WITH_FLAGS,
 };

-
 /** Template class to define collections of settings.
  * Example of usage:
  *
@ -119,6 +125,18 @@ public:
        std::conditional_t<Traits::allow_custom_settings, const CustomSettingMap::mapped_type*, boost::blank> custom_setting;
    };

+    /// Adds program options to set the settings from a command line.
+    /// (Don't forget to call notify() on the `variables_map` after parsing it!)
+    void addProgramOptions(boost::program_options::options_description & options);
+
+    /// Adds program options as to set the settings from a command line.
+    /// Allows to set one setting multiple times, the last value will be used.
+    /// (Don't forget to call notify() on the `variables_map` after parsing it!)
+    void addProgramOptionsAsMultitokens(boost::program_options::options_description & options);
+
+    void addProgramOption(boost::program_options::options_description & options, const SettingFieldRef & field);
+    void addProgramOptionAsMultitoken(boost::program_options::options_description & options, const SettingFieldRef & field);
+
    enum SkipFlags
    {
        SKIP_NONE = 0,
@ -518,6 +536,38 @@ String BaseSettings<TTraits>::toString() const
    return res;
 }

+template <typename TTraits>
+void BaseSettings<TTraits>::addProgramOptions(boost::program_options::options_description & options)
+{
+    for (const auto & field : all())
+        addProgramOption(options, field);
+}
+
+template <typename TTraits>
+void BaseSettings<TTraits>::addProgramOptionsAsMultitokens(boost::program_options::options_description & options)
+{
+    for (const auto & field : all())
+        addProgramOptionAsMultitoken(options, field);
+}
+
+template <typename TTraits>
+void BaseSettings<TTraits>::addProgramOption(boost::program_options::options_description & options, const SettingFieldRef & field)
+{
+    const std::string_view name = field.getName();
+    auto on_program_option = boost::function1<void, const std::string &>([this, name](const std::string & value) { set(name, value); });
+    options.add(boost::shared_ptr<boost::program_options::option_description>(new boost::program_options::option_description(
+        name.data(), boost::program_options::value<std::string>()->composing()->notifier(on_program_option), field.getDescription())));
+}
+
+template <typename TTraits>
+void BaseSettings<TTraits>::addProgramOptionAsMultitoken(boost::program_options::options_description & options, const SettingFieldRef & field)
+{
+    const std::string_view name = field.getName();
+    auto on_program_option = boost::function1<void, const Strings &>([this, name](const Strings & values) { set(name, values.back()); });
+    options.add(boost::shared_ptr<boost::program_options::option_description>(new boost::program_options::option_description(
+        name.data(), boost::program_options::value<Strings>()->multitoken()->composing()->notifier(on_program_option), field.getDescription())));
+}
+
 template <typename TTraits>
 bool operator==(const BaseSettings<TTraits> & left, const BaseSettings<TTraits> & right)
 {
--- a/src/Core/Settings.cpp
+++ b/src/Core/Settings.cpp
@ -6,7 +6,6 @@
 #include <Columns/ColumnMap.h>
 #include <Common/typeid_cast.h>
 #include <cstring>
-#include <boost/program_options/options_description.hpp>

 namespace DB
 {
@ -82,38 +81,6 @@ void Settings::dumpToMapColumn(IColumn * column, bool changed_only)
    offsets.push_back(offsets.back() + size);
 }

-void Settings::addProgramOptions(boost::program_options::options_description & options)
-{
-    for (const auto & field : all())
-    {
-        addProgramOption(options, field);
-    }
-}
-
-void Settings::addProgramOptionsAsMultitokens(boost::program_options::options_description & options)
-{
-    for (const auto & field : all())
-    {
-        addProgramOptionAsMultitoken(options, field);
-    }
-}
-
-void Settings::addProgramOption(boost::program_options::options_description & options, const SettingFieldRef & field)
-{
-    const std::string_view name = field.getName();
-    auto on_program_option = boost::function1<void, const std::string &>([this, name](const std::string & value) { set(name, value); });
-    options.add(boost::shared_ptr<boost::program_options::option_description>(new boost::program_options::option_description(
-        name.data(), boost::program_options::value<std::string>()->composing()->notifier(on_program_option), field.getDescription())));
-}
-
-void Settings::addProgramOptionAsMultitoken(boost::program_options::options_description & options, const SettingFieldRef & field)
-{
-    const std::string_view name = field.getName();
-    auto on_program_option = boost::function1<void, const Strings &>([this, name](const Strings & values) { set(name, values.back()); });
-    options.add(boost::shared_ptr<boost::program_options::option_description>(new boost::program_options::option_description(
-        name.data(), boost::program_options::value<Strings>()->multitoken()->composing()->notifier(on_program_option), field.getDescription())));
-}
-
 void Settings::checkNoSettingNamesAtTopLevel(const Poco::Util::AbstractConfiguration & config, const String & config_path)
 {
    if (config.getBool("skip_check_for_incorrect_settings", false))
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -13,12 +13,6 @@ namespace Poco::Util
    class AbstractConfiguration;
 }

-namespace boost::program_options
-{
-    class options_description;
-}
-
-
 namespace DB
 {
 class IColumn;
@ -96,6 +90,7 @@ class IColumn;
    M(Bool, s3_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in s3 engine tables", 0) \
    M(Bool, s3_check_objects_after_upload, false, "Check each uploaded object to s3 with head request to be sure that upload was successful", 0) \
    M(Bool, s3_allow_parallel_part_upload, true, "Use multiple threads for s3 multipart upload. It may lead to slightly higher memory usage", 0) \
+    M(Bool, s3_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \
    M(Bool, enable_s3_requests_logging, false, "Enable very explicit logging of S3 requests. Makes sense for debug only.", 0) \
    M(UInt64, hdfs_replication, 0, "The actual number of replications can be specified when the hdfs file is created.", 0) \
    M(Bool, hdfs_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables", 0) \
@ -894,6 +889,7 @@ class IColumn;
    M(Bool, input_format_bson_skip_fields_with_unsupported_types_in_schema_inference, false, "Skip fields with unsupported types while schema inference for format BSON.", 0) \
    \
    M(Bool, regexp_dict_allow_other_sources, false, "Allow regexp_tree dictionary to use sources other than yaml source.", 0) \
+    M(Bool, regexp_dict_allow_hyperscan, false, "Allow regexp_tree dictionary using Hyperscan library.", 0) \

 // End of FORMAT_FACTORY_SETTINGS
 // Please add settings non-related to formats into the COMMON_SETTINGS above.
@ -926,25 +922,12 @@ struct Settings : public BaseSettings<SettingsTraits>, public IHints<2, Settings
    /// Dumps profile events to column of type Map(String, String)
    void dumpToMapColumn(IColumn * column, bool changed_only = true);

-    /// Adds program options to set the settings from a command line.
-    /// (Don't forget to call notify() on the `variables_map` after parsing it!)
-    void addProgramOptions(boost::program_options::options_description & options);
-
-    /// Adds program options as to set the settings from a command line.
-    /// Allows to set one setting multiple times, the last value will be used.
-    /// (Don't forget to call notify() on the `variables_map` after parsing it!)
-    void addProgramOptionsAsMultitokens(boost::program_options::options_description & options);
-
    /// Check that there is no user-level settings at the top level in config.
    /// This is a common source of mistake (user don't know where to write user-level setting).
    static void checkNoSettingNamesAtTopLevel(const Poco::Util::AbstractConfiguration & config, const String & config_path);

    std::vector<String> getAllRegisteredNames() const override;

-    void addProgramOption(boost::program_options::options_description & options, const SettingFieldRef & field);
-
-    void addProgramOptionAsMultitoken(boost::program_options::options_description & options, const SettingFieldRef & field);
-
    void set(std::string_view name, const Field & value) override;

    void setDefaultValue(const String & name) { resetToDefault(name); }
--- a/src/Dictionaries/RegExpTreeDictionary.cpp
+++ b/src/Dictionaries/RegExpTreeDictionary.cpp
@ -1,3 +1,4 @@
+#include <exception>
 #include <optional>
 #include <string_view>

@ -88,8 +89,15 @@ struct RegExpTreeDictionary::RegexTreeNode
    UInt64      parent_id;
    std::string regex;
    re2_st::RE2 searcher;
+
    RegexTreeNode(UInt64 id_, UInt64 parent_id_, const String & regex_, const re2_st::RE2::Options & regexp_options):
        id(id_), parent_id(parent_id_), regex(regex_), searcher(regex_, regexp_options) {}
+
+    bool match(const char * haystack, size_t size) const
+    {
+        return searcher.Match(haystack, 0, size, re2_st::RE2::Anchor::UNANCHORED, nullptr, 0);
+    }
+
    struct AttributeValue
    {
        Field field;
@ -118,7 +126,7 @@ std::vector<StringPiece> createStringPieces(const String & value, int num_captur
                }
                int ref_num = value[i+1]-'0';
                if (ref_num >= num_captures)
-                    LOG_DEBUG(logger,
+                    LOG_TRACE(logger,
                        "Reference Id {} in set string is invalid, the regexp {} only has {} capturing groups",
                        ref_num, regex, num_captures-1);
                result.push_back(StringPiece(ref_num));
@ -137,13 +145,60 @@ std::vector<StringPiece> createStringPieces(const String & value, int num_captur

 void RegExpTreeDictionary::calculateBytesAllocated()
 {
-    for (const String & regex : regexps)
+    for (const String & regex : simple_regexps)
        bytes_allocated += regex.size();
    bytes_allocated += sizeof(UInt64) * regexp_ids.size();
    bytes_allocated += (sizeof(RegexTreeNode) + sizeof(UInt64)) * regex_nodes.size();
    bytes_allocated += 2 * sizeof(UInt64) * topology_order.size();
 }

+namespace
+{
+    /// hyper scan is not good at processing regex containing {0, 200}
+    /// This will make re compilation slow and failed. So we select this heavy regular expressions and
+    /// process it with re2.
+    struct RegexChecker
+    {
+        re2_st::RE2 searcher;
+        RegexChecker() : searcher(R"(\{([\d]+),([\d]+)\})") {}
+
+        static bool isFigureLargerThanFifty(const String & str)
+        try
+        {
+            auto number = std::stoi(str);
+            return number > 50;
+        }
+        catch (std::exception &)
+        {
+            return false;
+        }
+
+        [[maybe_unused]]
+        bool isSimpleRegex(const String & regex) const
+        {
+
+            re2_st::StringPiece haystack(regex.data(), regex.size());
+            re2_st::StringPiece matches[10];
+            size_t start_pos = 0;
+            while (start_pos < regex.size())
+            {
+                if (searcher.Match(haystack, start_pos, regex.size(), re2_st::RE2::Anchor::UNANCHORED, matches, 10))
+                {
+                    const auto & match = matches[0];
+                    start_pos += match.length();
+                    const auto & match1 = matches[1];
+                    const auto & match2 = matches[2];
+                    if (isFigureLargerThanFifty(match1.ToString()) || isFigureLargerThanFifty(match2.ToString()))
+                        return false;
+                }
+                else
+                    break;
+            }
+            return true;
+        }
+    };
+}
+
 void RegExpTreeDictionary::initRegexNodes(Block & block)
 {
    auto id_column = block.getByName(kId).column;
@ -152,6 +207,8 @@ void RegExpTreeDictionary::initRegexNodes(Block & block)
    auto keys_column = block.getByName(kKeys).column;
    auto values_column = block.getByName(kValues).column;

+    RegexChecker checker;
+
    size_t size = block.rows();
    for (size_t i = 0; i < size; i++)
    {
@ -165,12 +222,10 @@ void RegExpTreeDictionary::initRegexNodes(Block & block)
        if (id == 0)
            throw Exception(ErrorCodes::INCORRECT_DICTIONARY_DEFINITION, "There are invalid id {}", id);

-        regexps.push_back(regex);
-        regexp_ids.push_back(id);

        re2_st::RE2::Options regexp_options;
        regexp_options.set_log_errors(false);
-        RegexTreeNodePtr node = std::make_unique<RegexTreeNode>(id, parent_id, regex, regexp_options);
+        RegexTreeNodePtr node = std::make_shared<RegexTreeNode>(id, parent_id, regex, regexp_options);

        int num_captures = std::min(node->searcher.NumberOfCapturingGroups() + 1, 10);

@ -196,7 +251,16 @@ void RegExpTreeDictionary::initRegexNodes(Block & block)
                }
            }
        }
-        regex_nodes.emplace(id, std::move(node));
+        regex_nodes.emplace(id, node);
+#if USE_VECTORSCAN
+        if (use_vectorscan && checker.isSimpleRegex(regex))
+        {
+            simple_regexps.push_back(regex);
+            regexp_ids.push_back(id);
+        }
+        else
+#endif
+            complex_regexp_nodes.push_back(node);
    }
 }

@ -226,7 +290,7 @@ void RegExpTreeDictionary::initTopologyOrder(UInt64 node_idx, std::set<UInt64> &
    visited.insert(node_idx);
    for (UInt64 child_idx : regex_nodes[node_idx]->children)
        if (visited.contains(child_idx))
-            throw Exception(ErrorCodes::INCORRECT_DICTIONARY_DEFINITION, "Invalid Regex tree");
+            throw Exception(ErrorCodes::INCORRECT_DICTIONARY_DEFINITION, "Invalid Regex tree. The input tree is cyclical");
        else
            initTopologyOrder(child_idx, visited, topology_id);
    topology_order[node_idx] = topology_id++;
@ -245,12 +309,18 @@ void RegExpTreeDictionary::loadData()
            initRegexNodes(block);
        }
        initGraph();
-        if (regexps.empty())
+        if (simple_regexps.empty() && complex_regexp_nodes.empty())
            throw Exception(ErrorCodes::INCORRECT_DICTIONARY_DEFINITION, "There are no available regular expression. Please check your config");
+        LOG_INFO(logger, "There are {} simple regexps and {} complex regexps", simple_regexps.size(), complex_regexp_nodes.size());
+        /// If all the regexps cannot work with hyperscan, we should set this flag off to avoid exceptions.
+        if (simple_regexps.empty())
+            use_vectorscan = false;
+        if (!use_vectorscan)
+            return;
        #if USE_VECTORSCAN
        try
        {
-            std::vector<std::string_view> regexps_views(regexps.begin(), regexps.end());
+            std::vector<std::string_view> regexps_views(simple_regexps.begin(), simple_regexps.end());
            hyperscan_regex = MultiRegexps::getOrSet<true, false>(regexps_views, std::nullopt);
            hyperscan_regex->get();
        }
@ -258,7 +328,6 @@ void RegExpTreeDictionary::loadData()
        {
            /// Some compile errors will be thrown as LOGICAL ERROR and cause crash, e.g. empty expression or expressions are too large.
            /// We catch the error here and rethrow again.
-            /// TODO: fallback to other engine, like re2, when exceptions occur.
            throw Exception(ErrorCodes::INCORRECT_DICTIONARY_DEFINITION, "Error occurs when compiling regular expressions, reason: {}", e.message());
        }
        #endif
@ -270,8 +339,17 @@ void RegExpTreeDictionary::loadData()
 }

 RegExpTreeDictionary::RegExpTreeDictionary(
-    const StorageID & id_, const DictionaryStructure & structure_, DictionarySourcePtr source_ptr_, Configuration configuration_)
-    : IDictionary(id_), structure(structure_), source_ptr(source_ptr_), configuration(configuration_), logger(&Poco::Logger::get("RegExpTreeDictionary"))
+    const StorageID & id_,
+    const DictionaryStructure & structure_,
+    DictionarySourcePtr source_ptr_,
+    Configuration configuration_,
+    bool use_vectorscan_)
+    : IDictionary(id_),
+      structure(structure_),
+      source_ptr(source_ptr_),
+      configuration(configuration_),
+      use_vectorscan(use_vectorscan_),
+      logger(&Poco::Logger::get("RegExpTreeDictionary"))
 {
    if (auto * ch_source = typeid_cast<ClickHouseDictionarySource *>(source_ptr.get()))
    {
@ -289,12 +367,15 @@ RegExpTreeDictionary::RegExpTreeDictionary(
    calculateBytesAllocated();
 }

-String processBackRefs(const String & data, const re2_st::RE2 & searcher, const std::vector<StringPiece> & pieces)
+std::pair<String, bool> processBackRefs(const String & data, const re2_st::RE2 & searcher, const std::vector<StringPiece> & pieces)
 {
    re2_st::StringPiece haystack(data.data(), data.size());
    re2_st::StringPiece matches[10];
    String result;
    searcher.Match(haystack, 0, data.size(), re2_st::RE2::Anchor::UNANCHORED, matches, 10);
+    /// if the pattern is a single '$1' but fails to match, we would use the default value.
+    if (pieces.size() == 1 && pieces[0].ref_num >= 0 && pieces[0].ref_num < 10 && matches[pieces[0].ref_num].empty())
+        return std::make_pair(result, true);
    for (const auto & item : pieces)
    {
        if (item.ref_num >= 0 && item.ref_num < 10)
@ -302,7 +383,7 @@ String processBackRefs(const String & data, const re2_st::RE2 & searcher, const
        else
            result += item.literal;
    }
-    return result;
+    return {result, false};
 }

 // walk towards root and collect attributes.
@ -312,7 +393,9 @@ bool RegExpTreeDictionary::setAttributes(
    std::unordered_map<String, Field> & attributes_to_set,
    const String & data,
    std::unordered_set<UInt64> & visited_nodes,
-    const std::unordered_map<String, const DictionaryAttribute &> & attributes) const
+    const std::unordered_map<String, const DictionaryAttribute &> & attributes,
+    const std::unordered_map<String, ColumnPtr> & defaults,
+    size_t key_index) const
 {

    if (visited_nodes.contains(id))
@ -325,8 +408,14 @@ bool RegExpTreeDictionary::setAttributes(
            continue;
        if (value.containsBackRefs())
        {
-            String updated_str = processBackRefs(data, regex_nodes.at(id)->searcher, value.pieces);
-            attributes_to_set[name] = parseStringToField(updated_str, attributes.at(name).type);
+            auto [updated_str, use_default] = processBackRefs(data, regex_nodes.at(id)->searcher, value.pieces);
+            if (use_default)
+            {
+                DefaultValueProvider default_value(attributes.at(name).null_value, defaults.at(name));
+                attributes_to_set[name] = default_value.getDefaultValue(key_index);
+            }
+            else
+                attributes_to_set[name] = parseStringToField(updated_str, attributes.at(name).type);
        }
        else
            attributes_to_set[name] = value.field;
@ -334,18 +423,17 @@ bool RegExpTreeDictionary::setAttributes(

    auto parent_id = regex_nodes.at(id)->parent_id;
    if (parent_id > 0)
-        setAttributes(parent_id, attributes_to_set, data, visited_nodes, attributes);
+        setAttributes(parent_id, attributes_to_set, data, visited_nodes, attributes, defaults, key_index);

-    // if all the attributes have set, the walking through can be stopped.
+    /// if all the attributes have set, the walking through can be stopped.
    return attributes_to_set.size() == attributes.size();
 }

-#if USE_VECTORSCAN
 namespace
 {
    struct MatchContext
    {
-        std::unordered_set<UInt64> matched_idx_set;
+        std::set<UInt64> matched_idx_set;
        std::vector<std::pair<UInt64, UInt64>> matched_idx_sorted_list;

        const std::vector<UInt64> & regexp_ids ;
@ -354,14 +442,23 @@ namespace
        MatchContext(const std::vector<UInt64> & regexp_ids_, const std::unordered_map<UInt64, UInt64> & topology_order_)
            : regexp_ids(regexp_ids_), topology_order(topology_order_) {}

-        void insert(unsigned int id)
+        [[maybe_unused]]
+        void insertIdx(unsigned int idx)
        {
-            UInt64 idx = regexp_ids[id-1];
-            UInt64 topological_order = topology_order.at(idx);
-            matched_idx_set.emplace(idx);
-            matched_idx_sorted_list.push_back(std::make_pair(topological_order, idx));
+            UInt64 node_id = regexp_ids[idx-1];
+            UInt64 topological_order = topology_order.at(node_id);
+            matched_idx_set.emplace(node_id);
+            matched_idx_sorted_list.push_back(std::make_pair(topological_order, node_id));
        }

+        void insertNodeID(UInt64 id)
+        {
+            UInt64 topological_order = topology_order.at(id);
+            matched_idx_set.emplace(id);
+            matched_idx_sorted_list.push_back(std::make_pair(topological_order, id));
+        }
+
+        /// Sort by topological order, which indicates the matching priorities.
        void sort()
        {
            std::sort(matched_idx_sorted_list.begin(), matched_idx_sorted_list.end());
@ -373,24 +470,28 @@ namespace
        }
    };
 }
-#endif // USE_VECTORSCAN

-std::unordered_map<String, ColumnPtr> RegExpTreeDictionary::matchSearchAllIndices(
-    [[maybe_unused]] const ColumnString::Chars & keys_data,
-    [[maybe_unused]] const ColumnString::Offsets & keys_offsets,
-    [[maybe_unused]] const std::unordered_map<String, const DictionaryAttribute &> & attributes,
-    [[maybe_unused]] const std::unordered_map<String, ColumnPtr> & defaults) const
+std::unordered_map<String, ColumnPtr> RegExpTreeDictionary::match(
+    const ColumnString::Chars & keys_data,
+    const ColumnString::Offsets & keys_offsets,
+    const std::unordered_map<String, const DictionaryAttribute &> & attributes,
+    const std::unordered_map<String, ColumnPtr> & defaults) const
 {
+
 #if USE_VECTORSCAN
    hs_scratch_t * scratch = nullptr;
-    hs_error_t err = hs_clone_scratch(hyperscan_regex->get()->getScratch(), &scratch);
-
-    if (err != HS_SUCCESS)
+    if (use_vectorscan)
    {
-        throw Exception(ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Could not clone scratch space for hyperscan");
+        hs_error_t err = hs_clone_scratch(hyperscan_regex->get()->getScratch(), &scratch);
+
+        if (err != HS_SUCCESS)
+        {
+            throw Exception(ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Could not clone scratch space for hyperscan");
+        }
    }

    MultiRegexps::ScratchPtr smart_scratch(scratch);
+#endif

    std::unordered_map<String, MutableColumnPtr> columns;

@ -402,16 +503,6 @@ std::unordered_map<String, ColumnPtr> RegExpTreeDictionary::matchSearchAllIndice
        columns[name] = std::move(col_ptr);
    }

-    auto on_match = [](unsigned int id,
-                    unsigned long long /* from */, // NOLINT
-                    unsigned long long /* to */, // NOLINT
-                    unsigned int /* flags */,
-                    void * context) -> int
-    {
-        static_cast<MatchContext *>(context)->insert(id);
-        return 0;
-    };
-
    UInt64 offset = 0;
    for (size_t key_idx = 0; key_idx < keys_offsets.size(); ++key_idx)
    {
@ -420,25 +511,46 @@ std::unordered_map<String, ColumnPtr> RegExpTreeDictionary::matchSearchAllIndice

        MatchContext match_result(regexp_ids, topology_order);

-        err = hs_scan(
-            hyperscan_regex->get()->getDB(),
-            reinterpret_cast<const char *>(keys_data.data()) + offset,
-            static_cast<unsigned>(length),
-            0,
-            smart_scratch.get(),
-            on_match,
-            &match_result);
+#if USE_VECTORSCAN
+        if (use_vectorscan)
+        {
+            auto on_match = [](unsigned int id,
+                            unsigned long long /* from */, // NOLINT
+                            unsigned long long /* to */, // NOLINT
+                            unsigned int /* flags */,
+                            void * context) -> int
+            {
+                static_cast<MatchContext *>(context)->insertIdx(id);
+                return 0;
+            };
+            hs_error_t err = hs_scan(
+                hyperscan_regex->get()->getDB(),
+                reinterpret_cast<const char *>(keys_data.data()) + offset,
+                static_cast<unsigned>(length),
+                0,
+                smart_scratch.get(),
+                on_match,
+                &match_result);

-        if (err != HS_SUCCESS)
-            throw Exception(ErrorCodes::HYPERSCAN_CANNOT_SCAN_TEXT, "Failed to scan data with vectorscan");
+            if (err != HS_SUCCESS)
+                throw Exception(ErrorCodes::HYPERSCAN_CANNOT_SCAN_TEXT, "Failed to scan data with vectorscan");
+        }
+#endif
+
+        for (const auto & node_ptr : complex_regexp_nodes)
+        {
+            if (node_ptr->match(reinterpret_cast<const char *>(keys_data.data()) + offset, length))
+            {
+                match_result.insertNodeID(node_ptr->id);
+            }
+        }

        match_result.sort();
-
-        // Walk through the regex tree util all attributes are set;
+        /// Walk through the regex tree util all attributes are set;
        std::unordered_map<String, Field> attributes_to_set;
        std::unordered_set<UInt64> visited_nodes;

-        // check if it is a valid id
+        /// Some node matches but its parents cannot match. In this case we must regard this node unmatched.
        auto is_invalid = [&](UInt64 id)
        {
            while (id)
@ -459,7 +571,7 @@ std::unordered_map<String, ColumnPtr> RegExpTreeDictionary::matchSearchAllIndice
                continue;
            if (visited_nodes.contains(id))
                continue;
-            if (setAttributes(id, attributes_to_set, str, visited_nodes, attributes))
+            if (setAttributes(id, attributes_to_set, str, visited_nodes, attributes, defaults, key_idx))
                break;
        }

@ -468,12 +580,11 @@ std::unordered_map<String, ColumnPtr> RegExpTreeDictionary::matchSearchAllIndice
            if (attributes_to_set.contains(name))
                continue;

-            /// TODO: default value might be a back-reference, that is useful in lib ua-core
            DefaultValueProvider default_value(attr.null_value, defaults.at(name));
            columns[name]->insert(default_value.getDefaultValue(key_idx));
        }

-        // insert to columns
+        /// insert to columns
        for (const auto & [name, value] : attributes_to_set)
            columns[name]->insert(value);

@ -485,9 +596,6 @@ std::unordered_map<String, ColumnPtr> RegExpTreeDictionary::matchSearchAllIndice
        result.emplace(name, std::move(mutable_ptr));

    return result;
-#else
-    throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Multi search all indices is not implemented when USE_VECTORSCAN is off");
-#endif // USE_VECTORSCAN
 }

 Columns RegExpTreeDictionary::getColumns(
@ -516,7 +624,7 @@ Columns RegExpTreeDictionary::getColumns(

    /// calculate matches
    const ColumnString * key_column = typeid_cast<const ColumnString *>(key_columns[0].get());
-    const auto & columns_map = matchSearchAllIndices(
+    const auto & columns_map = match(
        key_column->getChars(),
        key_column->getOffsets(),
        attributes,
@ -561,7 +669,7 @@ void registerDictionaryRegExpTree(DictionaryFactory & factory)
                            "regexp_tree dictionary doesn't accept sources other than yaml source. "
                            "To active it, please set regexp_dict_allow_other_sources=true");

-        return std::make_unique<RegExpTreeDictionary>(dict_id, dict_struct, std::move(source_ptr), configuration);
+        return std::make_unique<RegExpTreeDictionary>(dict_id, dict_struct, std::move(source_ptr), configuration, context->getSettings().regexp_dict_allow_hyperscan);
    };

    factory.registerLayout("regexp_tree", create_layout, true);
--- a/src/Dictionaries/RegExpTreeDictionary.h
+++ b/src/Dictionaries/RegExpTreeDictionary.h
@ -43,7 +43,11 @@ public:
    const std::string name = "RegExpTree";

    RegExpTreeDictionary(
-        const StorageID & id_, const DictionaryStructure & structure_, DictionarySourcePtr source_ptr_, Configuration configuration_);
+        const StorageID & id_,
+        const DictionaryStructure & structure_,
+        DictionarySourcePtr source_ptr_,
+        Configuration configuration_,
+        bool use_vectorscan_);

    std::string getTypeName() const override { return name; }

@ -79,7 +83,7 @@ public:

    std::shared_ptr<const IExternalLoadable> clone() const override
    {
-        return std::make_shared<RegExpTreeDictionary>(getDictionaryID(), structure, source_ptr->clone(), configuration);
+        return std::make_shared<RegExpTreeDictionary>(getDictionaryID(), structure, source_ptr->clone(), configuration, use_vectorscan);
    }

    ColumnUInt8::Ptr hasKeys(const Columns &, const DataTypes &) const override
@ -122,11 +126,6 @@ private:
    mutable std::atomic<size_t> query_count{0};
    mutable std::atomic<size_t> found_count{0};

-    std::vector<std::string> regexps;
-    std::vector<UInt64>      regexp_ids;
-
-    Poco::Logger * logger;
-
    void calculateBytesAllocated();

    void loadData();
@ -135,7 +134,7 @@ private:
    void initTopologyOrder(UInt64 node_idx, std::set<UInt64> & visited, UInt64 & topology_id);
    void initGraph();

-    std::unordered_map<String, ColumnPtr> matchSearchAllIndices(
+    std::unordered_map<String, ColumnPtr> match(
        const ColumnString::Chars & keys_data,
        const ColumnString::Offsets & keys_offsets,
        const std::unordered_map<String, const DictionaryAttribute &> & attributes,
@ -146,16 +145,26 @@ private:
        std::unordered_map<String, Field> & attributes_to_set,
        const String & data,
        std::unordered_set<UInt64> & visited_nodes,
-        const std::unordered_map<String, const DictionaryAttribute &> & attributes) const;
+        const std::unordered_map<String, const DictionaryAttribute &> & attributes,
+        const std::unordered_map<String, ColumnPtr> & defaults,
+        size_t key_index) const;

    struct RegexTreeNode;
-    using RegexTreeNodePtr = std::unique_ptr<RegexTreeNode>;
+    using RegexTreeNodePtr = std::shared_ptr<RegexTreeNode>;
+
+    bool use_vectorscan;
+
+    std::vector<std::string> simple_regexps;
+    std::vector<UInt64>      regexp_ids;
+    std::vector<RegexTreeNodePtr> complex_regexp_nodes;

    std::map<UInt64, RegexTreeNodePtr> regex_nodes;
    std::unordered_map<UInt64, UInt64> topology_order;
    #if USE_VECTORSCAN
    MultiRegexps::DeferredConstructedRegexpsPtr hyperscan_regex;
    #endif
+
+    Poco::Logger * logger;
 };

 }
--- a/src/Disks/IO/IOUringReader.cpp
+++ b/src/Disks/IO/IOUringReader.cpp
@ -1,6 +1,7 @@
-#if defined(OS_LINUX)
-
 #include "IOUringReader.h"
+
+#if USE_LIBURING
+
 #include <base/errnoToString.h>
 #include <Common/assert_cast.h>
 #include <Common/Exception.h>
--- a/src/Disks/IO/IOUringReader.h
+++ b/src/Disks/IO/IOUringReader.h
@ -1,5 +1,8 @@
 #pragma once
-#if defined(OS_LINUX)
+
+#include "config.h"
+
+#if USE_LIBURING

 #include <Common/ThreadPool.h>
 #include <IO/AsynchronousReader.h>
--- a/src/Disks/IO/createReadBufferFromFileBase.cpp
+++ b/src/Disks/IO/createReadBufferFromFileBase.cpp
@ -7,7 +7,7 @@
 #include <Disks/IO/ThreadPoolReader.h>
 #include <IO/SynchronousReader.h>
 #include <Common/ProfileEvents.h>
-
+#include "config.h"

 namespace ProfileEvents
 {
@ -84,7 +84,7 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
        }
        else if (settings.local_fs_method == LocalFSReadMethod::io_uring)
        {
-#if defined(OS_LINUX)
+#if USE_LIBURING
            static std::shared_ptr<IOUringReader> reader = std::make_shared<IOUringReader>(512);
            if (!reader->isSupported())
                throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "io_uring is not supported by this system");
--- a/src/Disks/tests/gtest_azure_sdk.cpp
+++ b/src/Disks/tests/gtest_azure_sdk.cpp
@ -0,0 +1,41 @@
+#include <string>
+#include <vector>
+#include <Common/logger_useful.h>
+
+#include "config.h"
+
+#if USE_AZURE_BLOB_STORAGE
+
+#include <azure/storage/blobs.hpp>
+#include <azure/storage/common/internal/xml_wrapper.hpp>
+#include <azure/storage/blobs/blob_container_client.hpp>
+#include <azure/storage/blobs/blob_options.hpp>
+
+#include <gtest/gtest.h>
+
+TEST(AzureXMLWrapper, TestLeak)
+{
+    std::string str = "<hello>world</hello>";
+
+    Azure::Storage::_internal::XmlReader reader(str.c_str(), str.length());
+    Azure::Storage::_internal::XmlReader reader2(std::move(reader));
+    Azure::Storage::_internal::XmlReader reader3 = std::move(reader2);
+    reader3.Read();
+}
+
+TEST(AzureBlobContainerClient, CurlMemoryLeak)
+{
+    using Azure::Storage::Blobs::BlobContainerClient;
+    using Azure::Storage::Blobs::BlobClientOptions;
+
+    static constexpr auto unavailable_url = "http://unavailable:19999/bucket";
+    static constexpr auto container = "container";
+
+    BlobClientOptions options;
+    options.Retry.MaxRetries = 0;
+
+    auto client = std::make_unique<BlobContainerClient>(BlobContainerClient::CreateFromConnectionString(unavailable_url, container, options));
+    EXPECT_THROW({ client->ListBlobs(); }, Azure::Core::Http::TransportException);
+}
+
+#endif
--- a/src/Disks/tests/gtest_azure_xml_reader.cpp
+++ b/src/Disks/tests/gtest_azure_xml_reader.cpp
@ -1,25 +0,0 @@
-#include <string>
-#include <vector>
-#include <Common/logger_useful.h>
-
-#include "config.h"
-
-#if USE_AZURE_BLOB_STORAGE
-
-#include <azure/storage/blobs.hpp>
-#include <azure/storage/common/internal/xml_wrapper.hpp>
-
-#include <gtest/gtest.h>
-
-
-TEST(AzureXMLWrapper, TestLeak)
-{
-    std::string str = "<hello>world</hello>";
-
-    Azure::Storage::_internal::XmlReader reader(str.c_str(), str.length());
-    Azure::Storage::_internal::XmlReader reader2(std::move(reader));
-    Azure::Storage::_internal::XmlReader reader3 = std::move(reader2);
-    reader3.Read();
-}
-
-#endif
--- a/src/Functions/FunctionsJSON.cpp
+++ b/src/Functions/FunctionsJSON.cpp
@ -982,7 +982,7 @@ struct JSONExtractTree
                    return false;
            }

-            assert_cast<ColumnDecimal<DecimalType> &>(dest).insert(value);
+            assert_cast<ColumnDecimal<DecimalType> &>(dest).insertValue(value);
            return true;
        }

--- a/src/Interpreters/Aggregator.h
+++ b/src/Interpreters/Aggregator.h
@ -209,9 +209,13 @@ struct AggregationMethodOneNumber
    // Insert the key from the hash table into columns.
    static void insertKeyIntoColumns(const Key & key, std::vector<IColumn *> & key_columns, const Sizes & /*key_sizes*/)
    {
+        static_assert(sizeof(FieldType) <= sizeof(Key));
        const auto * key_holder = reinterpret_cast<const char *>(&key);
        auto * column = static_cast<ColumnVectorHelper *>(key_columns[0]);
-        column->insertRawData<sizeof(FieldType)>(key_holder);
+        if constexpr (sizeof(FieldType) < sizeof(Key) && std::endian::native == std::endian::big)
+            column->insertRawData<sizeof(FieldType)>(key_holder + (sizeof(Key) - sizeof(FieldType)));
+        else
+            column->insertRawData<sizeof(FieldType)>(key_holder);
    }
 };

--- a/src/Interpreters/BloomFilterHash.h
+++ b/src/Interpreters/BloomFilterHash.h
@ -94,6 +94,8 @@ struct BloomFilterHash
        else if (which.isFloat32()) return build_hash_column(getNumberTypeHash<Float64, Float64>(field));
        else if (which.isFloat64()) return build_hash_column(getNumberTypeHash<Float64, Float64>(field));
        else if (which.isUUID()) return build_hash_column(getNumberTypeHash<UUID, UUID>(field));
+        else if (which.isIPv4()) return build_hash_column(getNumberTypeHash<IPv4, IPv4>(field));
+        else if (which.isIPv6()) return build_hash_column(getNumberTypeHash<IPv6, IPv6>(field));
        else if (which.isString()) return build_hash_column(getStringTypeHash(field));
        else if (which.isFixedString()) return build_hash_column(getFixedStringTypeHash(field, data_type));
        else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected type {} of bloom filter index.", data_type->getName());
@ -156,6 +158,8 @@ struct BloomFilterHash
        else if (which.isFloat32()) getNumberTypeHash<Float32, is_first>(column, vec, pos);
        else if (which.isFloat64()) getNumberTypeHash<Float64, is_first>(column, vec, pos);
        else if (which.isUUID()) getNumberTypeHash<UUID, is_first>(column, vec, pos);
+        else if (which.isIPv4()) getNumberTypeHash<IPv4, is_first>(column, vec, pos);
+        else if (which.isIPv6()) getNumberTypeHash<IPv6, is_first>(column, vec, pos);
        else if (which.isString()) getStringTypeHash<is_first>(column, vec, pos);
        else if (which.isFixedString()) getStringTypeHash<is_first>(column, vec, pos);
        else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected type {} of bloom filter index.", data_type->getName());
--- a/src/Interpreters/Cache/QueryCache.cpp
+++ b/src/Interpreters/Cache/QueryCache.cpp
@ -313,14 +313,6 @@ Pipe && QueryCache::Reader::getPipe()
    return std::move(pipe);
 }

-QueryCache::QueryCache(size_t max_cache_size_in_bytes_, size_t max_cache_entries_, size_t max_cache_entry_size_in_bytes_, size_t max_cache_entry_size_in_rows_)
-    : max_cache_size_in_bytes(max_cache_size_in_bytes_)
-    , max_cache_entries(max_cache_entries_)
-    , max_cache_entry_size_in_bytes(max_cache_entry_size_in_bytes_)
-    , max_cache_entry_size_in_rows(max_cache_entry_size_in_rows_)
-{
-}
-
 QueryCache::Reader QueryCache::createReader(const Key & key)
 {
    std::lock_guard lock(mutex);
@ -343,14 +335,22 @@ void QueryCache::reset()

 size_t QueryCache::recordQueryRun(const Key & key)
 {
-    static constexpr size_t TIMES_EXECUTED_MAX_SIZE = 10'000;
-
-    std::lock_guard times_executed_lock(mutex);
+    std::lock_guard lock(mutex);
    size_t times = ++times_executed[key];
    // Regularly drop times_executed to avoid DOS-by-unlimited-growth.
+    static constexpr size_t TIMES_EXECUTED_MAX_SIZE = 10'000;
    if (times_executed.size() > TIMES_EXECUTED_MAX_SIZE)
        times_executed.clear();
    return times;
 }

+void QueryCache::updateConfiguration(const Poco::Util::AbstractConfiguration & config)
+{
+    std::lock_guard lock(mutex);
+    max_cache_size_in_bytes = config.getUInt64("query_cache.size", 1_GiB);
+    max_cache_entries = config.getUInt64("query_cache.max_entries", 1024);
+    max_cache_entry_size_in_bytes = config.getUInt64("query_cache.max_entry_size", 1_MiB);
+    max_cache_entry_size_in_rows = config.getUInt64("query_cache.max_entry_rows", 30'000'000);
+}
+
 }
--- a/src/Interpreters/Cache/QueryCache.h
+++ b/src/Interpreters/Cache/QueryCache.h
@ -2,6 +2,7 @@

 #include <Core/Block.h>
 #include <Parsers/IAST_fwd.h>
+#include <Poco/Util/LayeredConfiguration.h>
 #include <Processors/Chunk.h>
 #include <QueryPipeline/Pipe.h>

@ -132,7 +133,7 @@ public:
        friend class QueryCache; /// for createReader()
    };

-    QueryCache(size_t max_cache_size_in_bytes_, size_t max_cache_entries_, size_t max_cache_entry_size_in_bytes_, size_t max_cache_entry_size_in_rows_);
+    void updateConfiguration(const Poco::Util::AbstractConfiguration & config);

    Reader createReader(const Key & key);
    Writer createWriter(const Key & key, std::chrono::milliseconds min_query_runtime);
@ -154,11 +155,13 @@ private:
    Cache cache TSA_GUARDED_BY(mutex);
    TimesExecuted times_executed TSA_GUARDED_BY(mutex);

-    size_t cache_size_in_bytes TSA_GUARDED_BY(mutex) = 0; /// updated in each cache insert/delete
-    const size_t max_cache_size_in_bytes;
-    const size_t max_cache_entries;
-    const size_t max_cache_entry_size_in_bytes;
-    const size_t max_cache_entry_size_in_rows;
+    /// Cache configuration
+    size_t max_cache_size_in_bytes TSA_GUARDED_BY(mutex) = 0;
+    size_t max_cache_entries TSA_GUARDED_BY(mutex) = 0;
+    size_t max_cache_entry_size_in_bytes TSA_GUARDED_BY(mutex) = 0;
+    size_t max_cache_entry_size_in_rows TSA_GUARDED_BY(mutex) = 0;
+
+    size_t cache_size_in_bytes TSA_GUARDED_BY(mutex) = 0; /// Updated in each cache insert/delete

    friend class StorageSystemQueryCache;
 };
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@ -2041,14 +2041,22 @@ void Context::dropIndexMarkCache() const
        shared->index_mark_cache->reset();
 }

-void Context::setQueryCache(size_t max_size_in_bytes, size_t max_entries, size_t max_entry_size_in_bytes, size_t max_entry_size_in_records)
+void Context::setQueryCache(const Poco::Util::AbstractConfiguration & config)
 {
    auto lock = getLock();

    if (shared->query_cache)
        throw Exception(ErrorCodes::LOGICAL_ERROR, "Query cache has been already created.");

-    shared->query_cache = std::make_shared<QueryCache>(max_size_in_bytes, max_entries, max_entry_size_in_bytes, max_entry_size_in_records);
+    shared->query_cache = std::make_shared<QueryCache>();
+    shared->query_cache->updateConfiguration(config);
+}
+
+void Context::updateQueryCacheConfiguration(const Poco::Util::AbstractConfiguration & config)
+{
+    auto lock = getLock();
+    if (shared->query_cache)
+        shared->query_cache->updateConfiguration(config);
 }

 QueryCachePtr Context::getQueryCache() const
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@ -872,7 +872,8 @@ public:
    void dropMMappedFileCache() const;

    /// Create a cache of query results for statements which run repeatedly.
-    void setQueryCache(size_t max_size_in_bytes, size_t max_entries, size_t max_entry_size_in_bytes, size_t max_entry_size_in_records);
+    void setQueryCache(const Poco::Util::AbstractConfiguration & config);
+    void updateQueryCacheConfiguration(const Poco::Util::AbstractConfiguration & config);
    std::shared_ptr<QueryCache> getQueryCache() const;
    void dropQueryCache() const;

--- a/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/src/Interpreters/ExpressionAnalyzer.cpp
@ -1801,11 +1801,16 @@ ExpressionActionsPtr ExpressionAnalyzer::getActions(bool add_aliases, bool proje
        getActionsDAG(add_aliases, project_result), ExpressionActionsSettings::fromContext(getContext(), compile_expressions));
 }

-ExpressionActionsPtr ExpressionAnalyzer::getConstActions(const ColumnsWithTypeAndName & constant_inputs)
+ActionsDAGPtr ExpressionAnalyzer::getConstActionsDAG(const ColumnsWithTypeAndName & constant_inputs)
 {
    auto actions = std::make_shared<ActionsDAG>(constant_inputs);
-
    getRootActions(query, true /* no_makeset_for_subqueries */, actions, true /* only_consts */);
+    return actions;
+}
+
+ExpressionActionsPtr ExpressionAnalyzer::getConstActions(const ColumnsWithTypeAndName & constant_inputs)
+{
+    auto actions = getConstActionsDAG(constant_inputs);
    return std::make_shared<ExpressionActions>(actions, ExpressionActionsSettings::fromContext(getContext()));
 }

--- a/src/Interpreters/ExpressionAnalyzer.h
+++ b/src/Interpreters/ExpressionAnalyzer.h
@ -119,8 +119,9 @@ public:
    ActionsDAGPtr getActionsDAG(bool add_aliases, bool project_result = true);
    ExpressionActionsPtr getActions(bool add_aliases, bool project_result = true, CompileExpressions compile_expressions = CompileExpressions::no);

-    /// Actions that can be performed on an empty block: adding constants and applying functions that depend only on constants.
+    /// Get actions to evaluate a constant expression. The function adds constants and applies functions that depend only on constants.
    /// Does not execute subqueries.
+    ActionsDAGPtr getConstActionsDAG(const ColumnsWithTypeAndName & constant_inputs = {});
    ExpressionActionsPtr getConstActions(const ColumnsWithTypeAndName & constant_inputs = {});

    /** Sets that require a subquery to be create.
--- a/src/Interpreters/evaluateConstantExpression.cpp
+++ b/src/Interpreters/evaluateConstantExpression.cpp
@ -70,7 +70,7 @@ std::pair<Field, std::shared_ptr<const IDataType>> evaluateConstantExpression(co
    if (context->getClientInfo().query_kind != ClientInfo::QueryKind::SECONDARY_QUERY && context->getSettingsRef().normalize_function_names)
        FunctionNameNormalizer().visit(ast.get());

-    String name = ast->getColumnName();
+    String result_name = ast->getColumnName();
    auto syntax_result = TreeRewriter(context).analyze(ast, source_columns);

    /// AST potentially could be transformed to literal during TreeRewriter analyze.
@ -78,33 +78,37 @@ std::pair<Field, std::shared_ptr<const IDataType>> evaluateConstantExpression(co
    if (ASTLiteral * literal = ast->as<ASTLiteral>())
        return getFieldAndDataTypeFromLiteral(literal);

-    ExpressionActionsPtr expr_for_constant_folding = ExpressionAnalyzer(ast, syntax_result, context).getConstActions();
+    auto actions = ExpressionAnalyzer(ast, syntax_result, context).getConstActionsDAG();

-    /// There must be at least one column in the block so that it knows the number of rows.
-    Block block_with_constants{{ ColumnConst::create(ColumnUInt8::create(1, 0), 1), std::make_shared<DataTypeUInt8>(), "_dummy" }};
+    ColumnPtr result_column;
+    DataTypePtr result_type;
+    for (const auto & action_node : actions->getOutputs())
+    {
+        if ((action_node->result_name == result_name) && action_node->column)
+        {
+            result_column = action_node->column;
+            result_type = action_node->result_type;
+            break;
+        }
+    }

-    expr_for_constant_folding->execute(block_with_constants);
+    if (!result_column)
+        throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                        "Element of set in IN, VALUES or LIMIT or aggregate function parameter "
+                        "is not a constant expression (result column not found): {}", result_name);

-    if (!block_with_constants || block_with_constants.rows() == 0)
+    if (result_column->empty())
        throw Exception(ErrorCodes::LOGICAL_ERROR,
-                        "Logical error: empty block after evaluation "
+                        "Logical error: empty result column after evaluation "
                        "of constant expression for IN, VALUES or LIMIT or aggregate function parameter");

-    if (!block_with_constants.has(name))
-        throw Exception(ErrorCodes::BAD_ARGUMENTS,
-                        "Element of set in IN, VALUES or LIMIT or aggregate function parameter "
-                        "is not a constant expression (result column not found): {}", name);
-
-    const ColumnWithTypeAndName & result = block_with_constants.getByName(name);
-    const IColumn & result_column = *result.column;
-
    /// Expressions like rand() or now() are not constant
-    if (!isColumnConst(result_column))
+    if (!isColumnConst(*result_column))
        throw Exception(ErrorCodes::BAD_ARGUMENTS,
                        "Element of set in IN, VALUES or LIMIT or aggregate function parameter "
-                        "is not a constant expression (result column is not const): {}", name);
+                        "is not a constant expression (result column is not const): {}", result_name);

-    return std::make_pair(result_column[0], result.type);
+    return std::make_pair((*result_column)[0], result_type);
 }


--- a/src/Parsers/ASTCreateQuery.cpp
+++ b/src/Parsers/ASTCreateQuery.cpp
@ -74,6 +74,11 @@ void ASTStorage::formatImpl(const FormatSettings & s, FormatState & state, Forma
    }
 }

+bool ASTStorage::isExtendedStorageDefinition() const
+{
+    return partition_by || primary_key || order_by || sample_by || settings;
+}
+

 class ASTColumnsElement : public IAST
 {
--- a/src/Parsers/ASTCreateQuery.h
+++ b/src/Parsers/ASTCreateQuery.h
@ -30,6 +30,8 @@ public:
    ASTPtr clone() const override;

    void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override;
+
+    bool isExtendedStorageDefinition() const;
 };


--- a/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.cpp
@ -8,7 +8,7 @@ namespace DB
 {

 JSONColumnsBlockOutputFormat::JSONColumnsBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_, bool validate_utf8, size_t indent_)
-    : JSONColumnsBlockOutputFormatBase(out_, header_, format_settings_, validate_utf8), indent(indent_)
+    : JSONColumnsBlockOutputFormatBase(out_, header_, format_settings_, validate_utf8), indent(indent_), header(header_)
 {
    names = JSONUtils::makeNamesValidJSONStrings(header_.getNames(), format_settings, validate_utf8);
 }
@ -25,6 +25,18 @@ void JSONColumnsBlockOutputFormat::writeColumnStart(size_t column_index)

 void JSONColumnsBlockOutputFormat::writeChunkEnd()
 {
+    /// Write empty chunk
+    if (!written_rows)
+    {
+        const auto & columns = header.getColumns();
+        for (size_t i = 0; i != columns.size(); ++i)
+        {
+            writeColumnStart(i);
+            writeColumn(*columns[i], *serializations[i]);
+            writeColumnEnd(i == columns.size() - 1);
+        }
+    }
+
    JSONUtils::writeObjectEnd(*ostr, indent);
    writeChar('\n', *ostr);
 }
--- a/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.h
+++ b/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.h
@ -27,6 +27,8 @@ protected:

    Names names;
    size_t indent;
+
+    Block header;
 };

 }
--- a/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormatBase.cpp
+++ b/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormatBase.cpp
@ -42,6 +42,7 @@ void JSONColumnsBlockOutputFormatBase::writeChunk(Chunk & chunk)
        writeColumn(*columns[i], *serializations[i]);
        writeColumnEnd(i == columns.size() - 1);
    }
+    written_rows += chunk.getNumRows();
    writeChunkEnd();
 }

--- a/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormatBase.h
+++ b/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormatBase.h
@ -36,6 +36,8 @@ protected:
    const Serializations serializations;

    Chunk mono_chunk;
+
+    size_t written_rows = 0;
 };

 }
--- a/src/Processors/Transforms/IntersectOrExceptTransform.cpp
+++ b/src/Processors/Transforms/IntersectOrExceptTransform.cpp
@ -124,6 +124,8 @@ size_t IntersectOrExceptTransform::buildFilter(

 void IntersectOrExceptTransform::accumulate(Chunk chunk)
 {
+    convertToFullIfSparse(chunk);
+
    auto num_rows = chunk.getNumRows();
    auto columns = chunk.detachColumns();

@ -160,6 +162,8 @@ void IntersectOrExceptTransform::accumulate(Chunk chunk)

 void IntersectOrExceptTransform::filter(Chunk & chunk)
 {
+    convertToFullIfSparse(chunk);
+
    auto num_rows = chunk.getNumRows();
    auto columns = chunk.detachColumns();

--- a/src/Storages/Distributed/DirectoryMonitor.cpp
+++ b/src/Storages/Distributed/DirectoryMonitor.cpp
@ -700,12 +700,14 @@ struct StorageDistributedDirectoryMonitor::BatchHeader

 struct StorageDistributedDirectoryMonitor::Batch
 {
+    /// File indexes for this batch.
    std::vector<UInt64> file_indices;
    size_t total_rows = 0;
    size_t total_bytes = 0;
    bool recovered = false;

    StorageDistributedDirectoryMonitor & parent;
+    /// Information about all available indexes (not only for the current batch).
    const std::map<UInt64, String> & file_index_to_path;

    bool split_batch_on_failure = true;
@ -795,17 +797,22 @@ struct StorageDistributedDirectoryMonitor::Batch
            else
            {
                std::vector<std::string> files;
-                for (const auto && file_info : file_index_to_path | boost::adaptors::indexed())
+                for (auto file_index_info : file_indices | boost::adaptors::indexed())
                {
-                    if (file_info.index() > 8)
+                    if (file_index_info.index() > 8)
                    {
                        files.push_back("...");
                        break;
                    }

-                    files.push_back(file_info.value().second);
+                    auto file_index = file_index_info.value();
+                    auto file_path = file_index_to_path.find(file_index);
+                    if (file_path != file_index_to_path.end())
+                        files.push_back(file_path->second);
+                    else
+                        files.push_back(fmt::format("#{}.bin (deleted)", file_index));
                }
-                e.addMessage(fmt::format("While sending batch, nums: {}, files: {}", file_index_to_path.size(), fmt::join(files, "\n")));
+                e.addMessage(fmt::format("While sending batch, size: {}, files: {}", file_indices.size(), fmt::join(files, "\n")));

                throw;
            }
--- a/src/Storages/MergeTree/KeyCondition.cpp
+++ b/src/Storages/MergeTree/KeyCondition.cpp
@ -706,8 +706,12 @@ Block KeyCondition::getBlockWithConstants(

    if (syntax_analyzer_result)
    {
-        const auto expr_for_constant_folding = ExpressionAnalyzer(query, syntax_analyzer_result, context).getConstActions();
-        expr_for_constant_folding->execute(result);
+        auto actions = ExpressionAnalyzer(query, syntax_analyzer_result, context).getConstActionsDAG();
+        for (const auto & action_node : actions->getOutputs())
+        {
+            if (action_node->column)
+                result.insert(ColumnWithTypeAndName{action_node->column, action_node->result_type, action_node->result_name});
+        }
    }

    return result;
--- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp
+++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp
@ -379,11 +379,6 @@ namespace

        bool columnExists(const String & name) const { return block.has(name); }

-        void insertStringColumn(const ColumnPtr & column, const String & name)
-        {
-            block.insert({column, std::make_shared<DataTypeString>(), name});
-        }
-
        void insertUInt8Column(const ColumnPtr & column, const String & name)
        {
            block.insert({column, std::make_shared<DataTypeUInt8>(), name});
@ -399,6 +394,11 @@ namespace
            block.insert({column, std::make_shared<DataTypeUUID>(), name});
        }

+        void insertLowCardinalityColumn(const ColumnPtr & column, const String & name)
+        {
+            block.insert({column, std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()), name});
+        }
+
        void insertPartitionValueColumn(
            size_t rows, const Row & partition_value, const DataTypePtr & partition_value_type, const String & name)
        {
@ -483,11 +483,13 @@ static void injectPartConstVirtualColumns(
            {
                ColumnPtr column;
                if (rows)
-                    column = DataTypeString().createColumnConst(rows, part->name)->convertToFullColumnIfConst();
+                    column = DataTypeLowCardinality{std::make_shared<DataTypeString>()}
+                                 .createColumnConst(rows, part->name)
+                                 ->convertToFullColumnIfConst();
                else
-                    column = DataTypeString().createColumn();
+                    column = DataTypeLowCardinality{std::make_shared<DataTypeString>()}.createColumn();

-                inserter.insertStringColumn(column, virtual_column_name);
+                inserter.insertLowCardinalityColumn(column, virtual_column_name);
            }
            else if (virtual_column_name == "_part_index")
            {
@ -513,11 +515,13 @@ static void injectPartConstVirtualColumns(
            {
                ColumnPtr column;
                if (rows)
-                    column = DataTypeString().createColumnConst(rows, part->info.partition_id)->convertToFullColumnIfConst();
+                    column = DataTypeLowCardinality{std::make_shared<DataTypeString>()}
+                                 .createColumnConst(rows, part->info.partition_id)
+                                 ->convertToFullColumnIfConst();
                else
-                    column = DataTypeString().createColumn();
+                    column = DataTypeLowCardinality{std::make_shared<DataTypeString>()}.createColumn();

-                inserter.insertStringColumn(column, virtual_column_name);
+                inserter.insertLowCardinalityColumn(column, virtual_column_name);
            }
            else if (virtual_column_name == "_partition_value")
            {
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@ -840,8 +840,14 @@ Block MergeTreeData::getSampleBlockWithVirtualColumns() const
 {
    DataTypePtr partition_value_type = getPartitionValueType();
    return {
-        ColumnWithTypeAndName(ColumnString::create(), std::make_shared<DataTypeString>(), "_part"),
-        ColumnWithTypeAndName(ColumnString::create(), std::make_shared<DataTypeString>(), "_partition_id"),
+        ColumnWithTypeAndName(
+            DataTypeLowCardinality{std::make_shared<DataTypeString>()}.createColumn(),
+            std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()),
+            "_part"),
+        ColumnWithTypeAndName(
+            DataTypeLowCardinality{std::make_shared<DataTypeString>()}.createColumn(),
+            std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()),
+            "_partition_id"),
        ColumnWithTypeAndName(ColumnUUID::create(), std::make_shared<DataTypeUUID>(), "_part_uuid"),
        ColumnWithTypeAndName(partition_value_type->createColumn(), partition_value_type, "_partition_value")};
 }
@ -1889,7 +1895,9 @@ size_t MergeTreeData::clearOldTemporaryDirectories(size_t custom_directories_lif
                {
                    if (temporary_parts.contains(basename))
                    {
-                        LOG_WARNING(log, "{} is in use (by merge/mutation/INSERT) (consider increasing temporary_directories_lifetime setting)", full_path);
+                        /// Actually we don't rely on temporary_directories_lifetime when removing old temporaries directoties,
+                        /// it's just an extra level of protection just in case we have a bug.
+                        LOG_INFO(log, "{} is in use (by merge/mutation/INSERT) (consider increasing temporary_directories_lifetime setting)", full_path);
                        continue;
                    }
                    else
@ -7576,7 +7584,19 @@ MergeTreeData::WriteAheadLogPtr MergeTreeData::getWriteAheadLog()
    if (!write_ahead_log)
    {
        auto reservation = reserveSpace(getSettings()->write_ahead_log_max_bytes);
-        write_ahead_log = std::make_shared<MergeTreeWriteAheadLog>(*this, reservation->getDisk());
+        for (const auto & disk: reservation->getDisks())
+        {
+            if (!disk->isRemote())
+            {
+                write_ahead_log = std::make_shared<MergeTreeWriteAheadLog>(*this, disk);
+                break;
+            }
+        }
+
+        if (!write_ahead_log)
+            throw Exception(
+                    ErrorCodes::NOT_IMPLEMENTED,
+                    "Can't store write ahead log in remote disk. It makes no sense.");
    }

    return write_ahead_log;
@ -7585,10 +7605,10 @@ MergeTreeData::WriteAheadLogPtr MergeTreeData::getWriteAheadLog()
 NamesAndTypesList MergeTreeData::getVirtuals() const
 {
    return NamesAndTypesList{
-        NameAndTypePair("_part", std::make_shared<DataTypeString>()),
+        NameAndTypePair("_part", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())),
        NameAndTypePair("_part_index", std::make_shared<DataTypeUInt64>()),
        NameAndTypePair("_part_uuid", std::make_shared<DataTypeUUID>()),
-        NameAndTypePair("_partition_id", std::make_shared<DataTypeString>()),
+        NameAndTypePair("_partition_id", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())),
        NameAndTypePair("_partition_value", getPartitionValueType()),
        NameAndTypePair("_sample_factor", std::make_shared<DataTypeFloat64>()),
        NameAndTypePair("_part_offset", std::make_shared<DataTypeUInt64>()),
--- a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp
@ -88,7 +88,8 @@ static void assertIndexColumnsType(const Block & header)
        WhichDataType which(actual_type);

        if (!which.isUInt() && !which.isInt() && !which.isString() && !which.isFixedString() && !which.isFloat() &&
-            !which.isDate() && !which.isDateTime() && !which.isDateTime64() && !which.isEnum() && !which.isUUID())
+            !which.isDate() && !which.isDateTime() && !which.isDateTime64() && !which.isEnum() && !which.isUUID() &&
+            !which.isIPv4() && !which.isIPv6())
            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unexpected type {} of bloom filter index.", type->getName());
    }
 }
--- a/src/Storages/MergeTree/registerStorageMergeTree.cpp
+++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp
@ -147,9 +147,9 @@ static StoragePtr create(const StorageFactory::Arguments & args)
        *  - Additional MergeTreeSettings in the SETTINGS clause;
        */

-    bool is_extended_storage_def = args.storage_def->partition_by || args.storage_def->primary_key || args.storage_def->order_by
-        || args.storage_def->sample_by || (args.query.columns_list->indices && !args.query.columns_list->indices->children.empty())
-        || (args.query.columns_list->projections && !args.query.columns_list->projections->children.empty()) || args.storage_def->settings;
+    bool is_extended_storage_def = args.storage_def->isExtendedStorageDefinition()
+        || (args.query.columns_list->indices && !args.query.columns_list->indices->children.empty())
+        || (args.query.columns_list->projections && !args.query.columns_list->projections->children.empty());

    String name_part = args.engine_name.substr(0, args.engine_name.size() - strlen("MergeTree"));

--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@ -268,11 +268,11 @@ NamesAndTypesList StorageDistributed::getVirtuals() const
    /// NOTE This is weird. Most of these virtual columns are part of MergeTree
    /// tables info. But Distributed is general-purpose engine.
    return NamesAndTypesList{
-        NameAndTypePair("_table", std::make_shared<DataTypeString>()),
-        NameAndTypePair("_part", std::make_shared<DataTypeString>()),
+        NameAndTypePair("_table", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())),
+        NameAndTypePair("_part", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())),
        NameAndTypePair("_part_index", std::make_shared<DataTypeUInt64>()),
        NameAndTypePair("_part_uuid", std::make_shared<DataTypeUUID>()),
-        NameAndTypePair("_partition_id", std::make_shared<DataTypeString>()),
+        NameAndTypePair("_partition_id", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())),
        NameAndTypePair("_sample_factor", std::make_shared<DataTypeFloat64>()),
        NameAndTypePair("_part_offset", std::make_shared<DataTypeUInt64>()),
        NameAndTypePair("_row_exists", std::make_shared<DataTypeUInt8>()),
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@ -664,7 +664,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
        {
            ColumnWithTypeAndName column;
            column.name = "_database";
-            column.type = std::make_shared<DataTypeString>();
+            column.type = std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>());
            column.column = column.type->createColumnConst(0, Field(database_name));

            auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column));
@ -682,7 +682,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
        {
            ColumnWithTypeAndName column;
            column.name = "_table";
-            column.type = std::make_shared<DataTypeString>();
+            column.type = std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>());
            column.column = column.type->createColumnConst(0, Field(table_name));

            auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column));
@ -980,7 +980,9 @@ void registerStorageMerge(StorageFactory & factory)

 NamesAndTypesList StorageMerge::getVirtuals() const
 {
-    NamesAndTypesList virtuals{{"_database", std::make_shared<DataTypeString>()}, {"_table", std::make_shared<DataTypeString>()}};
+    NamesAndTypesList virtuals{
+        {"_database", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
+        {"_table", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())}};

    auto first_table = getFirstTable([](auto && table) { return table; });
    if (first_table)
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@ -1601,37 +1601,39 @@ void StorageMergeTree::renameAndCommitEmptyParts(MutableDataPartsVector & new_pa

 void StorageMergeTree::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr query_context, TableExclusiveLockHolder &)
 {
-    /// Asks to complete merges and does not allow them to start.
-    /// This protects against "revival" of data for a removed partition after completion of merge.
-    auto merge_blocker = stopMergesAndWait();
-    waitForOutdatedPartsToBeLoaded();
-
-    Stopwatch watch;
-
-    auto txn = query_context->getCurrentTransaction();
-    MergeTreeData::Transaction transaction(*this, txn.get());
    {
-        auto operation_data_parts_lock = lockOperationsWithParts();
+        /// Asks to complete merges and does not allow them to start.
+        /// This protects against "revival" of data for a removed partition after completion of merge.
+        auto merge_blocker = stopMergesAndWait();
+        waitForOutdatedPartsToBeLoaded();

-        auto parts = getVisibleDataPartsVector(query_context);
+        Stopwatch watch;

-        auto future_parts = initCoverageWithNewEmptyParts(parts);
+        auto txn = query_context->getCurrentTransaction();
+        MergeTreeData::Transaction transaction(*this, txn.get());
+        {
+            auto operation_data_parts_lock = lockOperationsWithParts();

-        LOG_TEST(log, "Made {} empty parts in order to cover {} parts. Empty parts: {}, covered parts: {}. With txn {}",
-                 future_parts.size(), parts.size(),
-                 fmt::join(getPartsNames(future_parts), ", "), fmt::join(getPartsNames(parts), ", "),
-                 transaction.getTID());
+            auto parts = getVisibleDataPartsVector(query_context);

-        captureTmpDirectoryHolders(*this, future_parts);
+            auto future_parts = initCoverageWithNewEmptyParts(parts);

-        auto new_data_parts = createEmptyDataParts(*this, future_parts, txn);
-        renameAndCommitEmptyParts(new_data_parts, transaction);
+            LOG_TEST(log, "Made {} empty parts in order to cover {} parts. Empty parts: {}, covered parts: {}. With txn {}",
+                     future_parts.size(), parts.size(),
+                     fmt::join(getPartsNames(future_parts), ", "), fmt::join(getPartsNames(parts), ", "),
+                     transaction.getTID());

-        PartLog::addNewParts(query_context, new_data_parts, watch.elapsed());
+            captureTmpDirectoryHolders(*this, future_parts);

-        LOG_INFO(log, "Truncated table with {} parts by replacing them with new empty {} parts. With txn {}",
-                 parts.size(), future_parts.size(),
-                 transaction.getTID());
+            auto new_data_parts = createEmptyDataParts(*this, future_parts, txn);
+            renameAndCommitEmptyParts(new_data_parts, transaction);
+
+            PartLog::addNewParts(query_context, new_data_parts, watch.elapsed());
+
+            LOG_INFO(log, "Truncated table with {} parts by replacing them with new empty {} parts. With txn {}",
+                     parts.size(), future_parts.size(),
+                     transaction.getTID());
+        }
    }

    /// Old parts are needed to be destroyed before clearing them from filesystem.
@ -1642,48 +1644,50 @@ void StorageMergeTree::truncate(const ASTPtr &, const StorageMetadataPtr &, Cont

 void StorageMergeTree::dropPart(const String & part_name, bool detach, ContextPtr query_context)
 {
-    /// Asks to complete merges and does not allow them to start.
-    /// This protects against "revival" of data for a removed partition after completion of merge.
-    auto merge_blocker = stopMergesAndWait();
-
-    Stopwatch watch;
-
-    /// It's important to create it outside of lock scope because
-    /// otherwise it can lock parts in destructor and deadlock is possible.
-    auto txn = query_context->getCurrentTransaction();
-    MergeTreeData::Transaction transaction(*this, txn.get());
    {
-        auto operation_data_parts_lock = lockOperationsWithParts();
+        /// Asks to complete merges and does not allow them to start.
+        /// This protects against "revival" of data for a removed partition after completion of merge.
+        auto merge_blocker = stopMergesAndWait();

-        auto part = getPartIfExists(part_name, {MergeTreeDataPartState::Active});
-        if (!part)
-            throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "Part {} not found, won't try to drop it.", part_name);
+        Stopwatch watch;

-        if (detach)
+        /// It's important to create it outside of lock scope because
+        /// otherwise it can lock parts in destructor and deadlock is possible.
+        auto txn = query_context->getCurrentTransaction();
+        MergeTreeData::Transaction transaction(*this, txn.get());
        {
-            auto metadata_snapshot = getInMemoryMetadataPtr();
-            LOG_INFO(log, "Detaching {}", part->getDataPartStorage().getPartDirectory());
-            part->makeCloneInDetached("", metadata_snapshot);
-        }
+            auto operation_data_parts_lock = lockOperationsWithParts();

-        {
-            auto future_parts = initCoverageWithNewEmptyParts({part});
+            auto part = getPartIfExists(part_name, {MergeTreeDataPartState::Active});
+            if (!part)
+                throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "Part {} not found, won't try to drop it.", part_name);

-            LOG_TEST(log, "Made {} empty parts in order to cover {} part. With txn {}",
-                     fmt::join(getPartsNames(future_parts), ", "), fmt::join(getPartsNames({part}), ", "),
-                     transaction.getTID());
+            if (detach)
+            {
+                auto metadata_snapshot = getInMemoryMetadataPtr();
+                LOG_INFO(log, "Detaching {}", part->getDataPartStorage().getPartDirectory());
+                part->makeCloneInDetached("", metadata_snapshot);
+            }

-            captureTmpDirectoryHolders(*this, future_parts);
+            {
+                auto future_parts = initCoverageWithNewEmptyParts({part});

-            auto new_data_parts = createEmptyDataParts(*this, future_parts, txn);
-            renameAndCommitEmptyParts(new_data_parts, transaction);
+                LOG_TEST(log, "Made {} empty parts in order to cover {} part. With txn {}",
+                         fmt::join(getPartsNames(future_parts), ", "), fmt::join(getPartsNames({part}), ", "),
+                         transaction.getTID());

-            PartLog::addNewParts(query_context, new_data_parts, watch.elapsed());
+                captureTmpDirectoryHolders(*this, future_parts);

-            const auto * op = detach ? "Detached" : "Dropped";
-            LOG_INFO(log, "{} {} part by replacing it with new empty {} part. With txn {}",
-                     op, part->name, future_parts[0].part_name,
-                     transaction.getTID());
+                auto new_data_parts = createEmptyDataParts(*this, future_parts, txn);
+                renameAndCommitEmptyParts(new_data_parts, transaction);
+
+                PartLog::addNewParts(query_context, new_data_parts, watch.elapsed());
+
+                const auto * op = detach ? "Detached" : "Dropped";
+                LOG_INFO(log, "{} {} part by replacing it with new empty {} part. With txn {}",
+                         op, part->name, future_parts[0].part_name,
+                         transaction.getTID());
+            }
        }
    }

@ -1695,58 +1699,60 @@ void StorageMergeTree::dropPart(const String & part_name, bool detach, ContextPt

 void StorageMergeTree::dropPartition(const ASTPtr & partition, bool detach, ContextPtr query_context)
 {
-    const auto * partition_ast = partition->as<ASTPartition>();
-
-    /// Asks to complete merges and does not allow them to start.
-    /// This protects against "revival" of data for a removed partition after completion of merge.
-    auto merge_blocker = stopMergesAndWait();
-
-    Stopwatch watch;
-
-    /// It's important to create it outside of lock scope because
-    /// otherwise it can lock parts in destructor and deadlock is possible.
-    auto txn = query_context->getCurrentTransaction();
-    MergeTreeData::Transaction transaction(*this, txn.get());
    {
-        auto operation_data_parts_lock = lockOperationsWithParts();
+        const auto * partition_ast = partition->as<ASTPartition>();

-        DataPartsVector parts;
+        /// Asks to complete merges and does not allow them to start.
+        /// This protects against "revival" of data for a removed partition after completion of merge.
+        auto merge_blocker = stopMergesAndWait();
+
+        Stopwatch watch;
+
+        /// It's important to create it outside of lock scope because
+        /// otherwise it can lock parts in destructor and deadlock is possible.
+        auto txn = query_context->getCurrentTransaction();
+        MergeTreeData::Transaction transaction(*this, txn.get());
        {
-            if (partition_ast && partition_ast->all)
-                parts = getVisibleDataPartsVector(query_context);
-            else
+            auto operation_data_parts_lock = lockOperationsWithParts();
+
+            DataPartsVector parts;
            {
-                String partition_id = getPartitionIDFromQuery(partition, query_context);
-                parts = getVisibleDataPartsVectorInPartition(query_context, partition_id);
+                if (partition_ast && partition_ast->all)
+                    parts = getVisibleDataPartsVector(query_context);
+                else
+                {
+                    String partition_id = getPartitionIDFromQuery(partition, query_context);
+                    parts = getVisibleDataPartsVectorInPartition(query_context, partition_id);
+                }
            }
+
+            if (detach)
+                for (const auto & part : parts)
+                {
+                    auto metadata_snapshot = getInMemoryMetadataPtr();
+                    LOG_INFO(log, "Detaching {}", part->getDataPartStorage().getPartDirectory());
+                    part->makeCloneInDetached("", metadata_snapshot);
+                }
+
+            auto future_parts = initCoverageWithNewEmptyParts(parts);
+
+            LOG_TEST(log, "Made {} empty parts in order to cover {} parts. Empty parts: {}, covered parts: {}. With txn {}",
+                     future_parts.size(), parts.size(),
+                     fmt::join(getPartsNames(future_parts), ", "), fmt::join(getPartsNames(parts), ", "),
+                     transaction.getTID());
+
+            captureTmpDirectoryHolders(*this, future_parts);
+
+            auto new_data_parts = createEmptyDataParts(*this, future_parts, txn);
+            renameAndCommitEmptyParts(new_data_parts, transaction);
+
+            PartLog::addNewParts(query_context, new_data_parts, watch.elapsed());
+
+            const auto * op = detach ? "Detached" : "Dropped";
+            LOG_INFO(log, "{} partition with {} parts by replacing them with new empty {} parts. With txn {}",
+                     op, parts.size(), future_parts.size(),
+                     transaction.getTID());
        }
-
-        if (detach)
-            for (const auto & part : parts)
-            {
-                auto metadata_snapshot = getInMemoryMetadataPtr();
-                LOG_INFO(log, "Detaching {}", part->getDataPartStorage().getPartDirectory());
-                part->makeCloneInDetached("", metadata_snapshot);
-            }
-
-        auto future_parts = initCoverageWithNewEmptyParts(parts);
-
-        LOG_TEST(log, "Made {} empty parts in order to cover {} parts. Empty parts: {}, covered parts: {}. With txn {}",
-                 future_parts.size(), parts.size(),
-                 fmt::join(getPartsNames(future_parts), ", "), fmt::join(getPartsNames(parts), ", "),
-                 transaction.getTID());
-
-        captureTmpDirectoryHolders(*this, future_parts);
-
-        auto new_data_parts = createEmptyDataParts(*this, future_parts, txn);
-        renameAndCommitEmptyParts(new_data_parts, transaction);
-
-        PartLog::addNewParts(query_context, new_data_parts, watch.elapsed());
-
-        const auto * op = detach ? "Detached" : "Dropped";
-        LOG_INFO(log, "{} partition with {} parts by replacing them with new empty {} parts. With txn {}",
-                 op, parts.size(), future_parts.size(),
-                 transaction.getTID());
    }

    /// Old parts are needed to be destroyed before clearing them from filesystem.
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@ -107,6 +107,7 @@ namespace ErrorCodes
    extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
    extern const int NOT_IMPLEMENTED;
    extern const int CANNOT_COMPILE_REGEXP;
+    extern const int FILE_DOESNT_EXIST;
 }

 class IOutputFormat;
@ -260,6 +261,9 @@ private:
            outcome_future = listObjectsAsync();
        }

+        if (request_settings.throw_on_zero_files_match && result_batch.empty())
+            throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Can not match any files using prefix {}", request.GetPrefix());
+
        KeysWithInfo temp_buffer;
        temp_buffer.reserve(result_batch.size());

--- a/src/Storages/StorageS3Settings.cpp
+++ b/src/Storages/StorageS3Settings.cpp
@ -167,6 +167,7 @@ S3Settings::RequestSettings::RequestSettings(const NamedCollection & collection)
    max_connections = collection.getOrDefault<UInt64>("max_connections", max_connections);
    list_object_keys_size = collection.getOrDefault<UInt64>("list_object_keys_size", list_object_keys_size);
    allow_head_object_request = collection.getOrDefault<bool>("allow_head_object_request", allow_head_object_request);
+    throw_on_zero_files_match = collection.getOrDefault<bool>("throw_on_zero_files_match", throw_on_zero_files_match);
 }

 S3Settings::RequestSettings::RequestSettings(
@ -182,6 +183,7 @@ S3Settings::RequestSettings::RequestSettings(
    check_objects_after_upload = config.getBool(key + "check_objects_after_upload", settings.s3_check_objects_after_upload);
    list_object_keys_size = config.getUInt64(key + "list_object_keys_size", settings.s3_list_object_keys_size);
    allow_head_object_request = config.getBool(key + "allow_head_object_request", allow_head_object_request);
+    throw_on_zero_files_match = config.getBool(key + "throw_on_zero_files_match", settings.s3_throw_on_zero_files_match);

    /// NOTE: it would be better to reuse old throttlers to avoid losing token bucket state on every config reload,
    /// which could lead to exceeding limit for short time. But it is good enough unless very high `burst` values are used.
@ -231,6 +233,9 @@ void S3Settings::RequestSettings::updateFromSettingsImpl(const Settings & settin
    if ((!if_changed || settings.s3_max_put_rps.changed || settings.s3_max_put_burst.changed) && settings.s3_max_put_rps)
        put_request_throttler = std::make_shared<Throttler>(
            settings.s3_max_put_rps, settings.s3_max_put_burst ? settings.s3_max_put_burst : Throttler::default_burst_seconds * settings.s3_max_put_rps);
+
+    if (!if_changed || settings.s3_throw_on_zero_files_match)
+        throw_on_zero_files_match = settings.s3_throw_on_zero_files_match;
 }

 void S3Settings::RequestSettings::updateFromSettings(const Settings & settings)
--- a/src/Storages/StorageS3Settings.h
+++ b/src/Storages/StorageS3Settings.h
@ -77,6 +77,8 @@ struct S3Settings
        /// See https://github.com/aws/aws-sdk-cpp/issues/1558 and also the function S3ErrorMarshaller::ExtractRegion() for more information.
        bool allow_head_object_request = true;

+        bool throw_on_zero_files_match = false;
+
        const PartUploadSettings & getUploadSettings() const { return upload_settings; }

        RequestSettings() = default;
--- a/src/Storages/System/StorageSystemBuildOptions.cpp.in
+++ b/src/Storages/System/StorageSystemBuildOptions.cpp.in
@ -51,6 +51,7 @@ const char * auto_config_build[]
    "USE_ROCKSDB", "@USE_ROCKSDB@",
    "USE_NURAFT", "@USE_NURAFT@",
    "USE_NLP", "@USE_NLP@",
+    "USE_LIBURING", "@USE_LIBURING@",
    "USE_SQLITE", "@USE_SQLITE@",
    "USE_LIBPQXX", "@USE_LIBPQXX@",
    "USE_AZURE_BLOB_STORAGE", "@USE_AZURE_BLOB_STORAGE@",
--- a/src/Storages/System/StorageSystemProcesses.cpp
+++ b/src/Storages/System/StorageSystemProcesses.cpp
@ -120,7 +120,7 @@ void StorageSystemProcesses::fillData(MutableColumns & res_columns, ContextPtr c
        res_columns[i++]->insert(process.client_info.quota_key);
        res_columns[i++]->insert(process.client_info.distributed_depth);

-        res_columns[i++]->insert(static_cast<double>(process.elapsed_microseconds) / 100000.0);
+        res_columns[i++]->insert(static_cast<double>(process.elapsed_microseconds) / 1'000'000.0);
        res_columns[i++]->insert(process.is_cancelled);
        res_columns[i++]->insert(process.is_all_data_sent);
        res_columns[i++]->insert(process.read_rows);
--- a/src/configure_config.cmake
+++ b/src/configure_config.cmake
@ -129,6 +129,9 @@ if (TARGET ch_contrib::parquet)
    set(USE_ARROW 1)
    set(USE_ORC 1)
 endif()
+if (TARGET ch_contrib::liburing)
+    set(USE_LIBURING 1)
+endif ()
 if (TARGET ch_contrib::protobuf)
    set(USE_PROTOBUF 1)
 endif()
--- a/tests/ci/build_download_helper.py
+++ b/tests/ci/build_download_helper.py
@ -5,7 +5,8 @@ import logging
 import os
 import sys
 import time
-from typing import Any, List, Optional
+from pathlib import Path
+from typing import Any, Callable, List, Optional

 import requests  # type: ignore

@ -56,21 +57,29 @@ def read_build_urls(build_name: str, reports_path: str) -> List[str]:
    return []


-def download_build_with_progress(url, path):
+def download_build_with_progress(url: str, path: Path) -> None:
    logging.info("Downloading from %s to temp path %s", url, path)
    for i in range(DOWNLOAD_RETRIES_COUNT):
        try:
+            response = get_with_retries(url, retries=1, stream=True)
+            total_length = int(response.headers.get("content-length", 0))
+            if path.is_file() and total_length and path.stat().st_size == total_length:
+                logging.info(
+                    "The file %s already exists and have a proper size %s",
+                    path,
+                    total_length,
+                )
+                return
+
            with open(path, "wb") as f:
-                response = get_with_retries(url, retries=1, stream=True)
-                total_length = response.headers.get("content-length")
-                if total_length is None or int(total_length) == 0:
+                if total_length == 0:
                    logging.info(
                        "No content-length, will download file without progress"
                    )
                    f.write(response.content)
                else:
                    dl = 0
-                    total_length = int(total_length)
+
                    logging.info("Content length is %ld bytes", total_length)
                    for data in response.iter_content(chunk_size=4096):
                        dl += len(data)
@ -99,12 +108,14 @@ def download_build_with_progress(url, path):
    logging.info("Downloading finished")


-def download_builds(result_path, build_urls, filter_fn):
+def download_builds(
+    result_path: str, build_urls: List[str], filter_fn: Callable[[str], bool]
+) -> None:
    for url in build_urls:
        if filter_fn(url):
            fname = os.path.basename(url.replace("%2B", "+").replace("%20", " "))
            logging.info("Will download %s to %s", fname, result_path)
-            download_build_with_progress(url, os.path.join(result_path, fname))
+            download_build_with_progress(url, Path(result_path) / fname)


 def download_builds_filter(
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@ -182,6 +182,12 @@ CI_CONFIG = {
    "tests_config": {
        # required_build - build name for artifacts
        # force_tests - force success status for tests
+        "Install packages (amd64)": {
+            "required_build": "package_release",
+        },
+        "Install packages (arm64)": {
+            "required_build": "package_aarch64",
+        },
        "Stateful tests (asan)": {
            "required_build": "package_asan",
        },
--- a/tests/ci/download_binary.py
+++ b/tests/ci/download_binary.py
@ -6,6 +6,7 @@ This file is needed to avoid cicle import build_download_helper.py <=> env_helpe
 import argparse
 import logging
 import os
+from pathlib import Path

 from build_download_helper import download_build_with_progress
 from ci_config import CI_CONFIG, BuildConfig
@ -57,14 +58,15 @@ def parse_args() -> argparse.Namespace:
 def main():
    logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
    args = parse_args()
-    os.makedirs(TEMP_PATH, exist_ok=True)
+    temp_path = Path(TEMP_PATH)
+    temp_path.mkdir(parents=True, exist_ok=True)
    for build in args.build_names:
        # check if it's in CI_CONFIG
        config = CI_CONFIG["build_config"][build]  # type: BuildConfig
        if args.rename:
-            path = os.path.join(TEMP_PATH, f"clickhouse-{config['static_binary_name']}")
+            path = temp_path / f"clickhouse-{config['static_binary_name']}"
        else:
-            path = os.path.join(TEMP_PATH, "clickhouse")
+            path = temp_path / "clickhouse"

        url = S3_ARTIFACT_DOWNLOAD_TEMPLATE.format(
            pr_or_release=f"{args.version.major}.{args.version.minor}",
--- a/tests/ci/install_check.py
+++ b/tests/ci/install_check.py
@ -0,0 +1,315 @@
+#!/usr/bin/env python3
+
+import argparse
+
+import atexit
+import logging
+import sys
+import subprocess
+from pathlib import Path
+
+from typing import Dict
+
+from github import Github
+
+from build_download_helper import download_builds_filter
+from clickhouse_helper import (
+    ClickHouseHelper,
+    mark_flaky_tests,
+    prepare_tests_results_for_clickhouse,
+)
+from commit_status_helper import post_commit_status, update_mergeable_check
+from docker_pull_helper import get_image_with_version, DockerImage
+from env_helper import CI, TEMP_PATH as TEMP, REPORTS_PATH
+from get_robot_token import get_best_robot_token
+from pr_info import PRInfo
+from report import TestResults, TestResult
+from rerun_helper import RerunHelper
+from s3_helper import S3Helper
+from stopwatch import Stopwatch
+from tee_popen import TeePopen
+from upload_result_helper import upload_results
+
+
+RPM_IMAGE = "clickhouse/install-rpm-test"
+DEB_IMAGE = "clickhouse/install-deb-test"
+TEMP_PATH = Path(TEMP)
+SUCCESS = "success"
+FAILURE = "failure"
+
+
+def prepare_test_scripts():
+    server_test = r"""#!/bin/bash
+systemctl start clickhouse-server
+clickhouse-client -q 'SELECT version()'"""
+    keeper_test = r"""#!/bin/bash
+systemctl start clickhouse-keeper
+for i in {1..20}; do
+    echo wait for clickhouse-keeper to being up
+    > /dev/tcp/127.0.0.1/9181 2>/dev/null && break || sleep 1
+done
+for i in {1..5}; do
+    echo wait for clickhouse-keeper to answer on mntr request
+    exec 13<>/dev/tcp/127.0.0.1/9181
+    echo mntr >&13
+    cat <&13 | grep zk_version && break || sleep 1
+    exec 13>&-
+done
+exec 13>&-"""
+    binary_test = r"""#!/bin/bash
+chmod +x /packages/clickhouse
+/packages/clickhouse install
+clickhouse-server start --daemon
+for i in {1..5}; do
+    clickhouse-client -q 'SELECT version()' && break || sleep 1
+done
+clickhouse-keeper start --daemon
+for i in {1..20}; do
+    echo wait for clickhouse-keeper to being up
+    > /dev/tcp/127.0.0.1/9181 2>/dev/null && break || sleep 1
+done
+for i in {1..5}; do
+    echo wait for clickhouse-keeper to answer on mntr request
+    exec 13<>/dev/tcp/127.0.0.1/9181
+    echo mntr >&13
+    cat <&13 | grep zk_version && break || sleep 1
+    exec 13>&-
+done
+exec 13>&-"""
+    (TEMP_PATH / "server_test.sh").write_text(server_test, encoding="utf-8")
+    (TEMP_PATH / "keeper_test.sh").write_text(keeper_test, encoding="utf-8")
+    (TEMP_PATH / "binary_test.sh").write_text(binary_test, encoding="utf-8")
+
+
+def test_install_deb(image: DockerImage) -> TestResults:
+    tests = {
+        "Install server deb": r"""#!/bin/bash -ex
+apt-get install /packages/clickhouse-{server,client,common}*deb
+bash -ex /packages/server_test.sh""",
+        "Install keeper deb": r"""#!/bin/bash -ex
+apt-get install /packages/clickhouse-keeper*deb
+bash -ex /packages/keeper_test.sh""",
+        "Install clickhouse binary in deb": r"bash -ex /packages/binary_test.sh",
+    }
+    return test_install(image, tests)
+
+
+def test_install_rpm(image: DockerImage) -> TestResults:
+    # FIXME: I couldn't find why Type=notify is broken in centos:8
+    # systemd just ignores the watchdog completely
+    tests = {
+        "Install server rpm": r"""#!/bin/bash -ex
+yum localinstall --disablerepo=* -y /packages/clickhouse-{server,client,common}*rpm
+echo CLICKHOUSE_WATCHDOG_ENABLE=0 > /etc/default/clickhouse-server
+bash -ex /packages/server_test.sh""",
+        "Install keeper rpm": r"""#!/bin/bash -ex
+yum localinstall --disablerepo=* -y /packages/clickhouse-keeper*rpm
+bash -ex /packages/keeper_test.sh""",
+        "Install clickhouse binary in rpm": r"bash -ex /packages/binary_test.sh",
+    }
+    return test_install(image, tests)
+
+
+def test_install_tgz(image: DockerImage) -> TestResults:
+    # FIXME: I couldn't find why Type=notify is broken in centos:8
+    # systemd just ignores the watchdog completely
+    tests = {
+        f"Install server tgz in {image.name}": r"""#!/bin/bash -ex
+[ -f /etc/debian_version ] && CONFIGURE=configure || CONFIGURE=
+for pkg in /packages/clickhouse-{common,client,server}*tgz; do
+    package=${pkg%-*}
+    package=${package##*/}
+    tar xf "$pkg"
+    "/$package/install/doinst.sh" $CONFIGURE
+done
+[ -f /etc/yum.conf ] && echo CLICKHOUSE_WATCHDOG_ENABLE=0 > /etc/default/clickhouse-server
+bash -ex /packages/server_test.sh""",
+        f"Install keeper tgz in {image.name}": r"""#!/bin/bash -ex
+[ -f /etc/debian_version ] && CONFIGURE=configure || CONFIGURE=
+for pkg in /packages/clickhouse-keeper*tgz; do
+    package=${pkg%-*}
+    package=${package##*/}
+    tar xf "$pkg"
+    "/$package/install/doinst.sh" $CONFIGURE
+done
+bash -ex /packages/keeper_test.sh""",
+    }
+    return test_install(image, tests)
+
+
+def test_install(image: DockerImage, tests: Dict[str, str]) -> TestResults:
+    test_results = []  # type: TestResults
+    for name, command in tests.items():
+        stopwatch = Stopwatch()
+        container_name = name.lower().replace(" ", "_").replace("/", "_")
+        log_file = TEMP_PATH / f"{container_name}.log"
+        run_command = (
+            f"docker run --rm --privileged --detach --cap-add=SYS_PTRACE "
+            f"--volume={TEMP_PATH}:/packages {image}"
+        )
+        logging.info("Running docker container: `%s`", run_command)
+        container_id = subprocess.check_output(
+            run_command, shell=True, encoding="utf-8"
+        ).strip()
+        (TEMP_PATH / "install.sh").write_text(command)
+        install_command = f"docker exec {container_id} bash -ex /packages/install.sh"
+        with TeePopen(install_command, log_file) as process:
+            retcode = process.wait()
+            if retcode == 0:
+                status = SUCCESS
+            else:
+                status = FAILURE
+
+        subprocess.check_call(f"docker kill -s 9 {container_id}", shell=True)
+        test_results.append(
+            TestResult(name, status, stopwatch.duration_seconds, [log_file])
+        )
+
+    return test_results
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        description="The script to check if the packages are able to install",
+    )
+
+    parser.add_argument(
+        "check_name",
+        help="check name, used to download the packages",
+    )
+    parser.add_argument("--download", default=True, help=argparse.SUPPRESS)
+    parser.add_argument(
+        "--no-download",
+        dest="download",
+        action="store_false",
+        default=argparse.SUPPRESS,
+        help="if set, the packages won't be downloaded, useful for debug",
+    )
+    parser.add_argument("--deb", default=True, help=argparse.SUPPRESS)
+    parser.add_argument(
+        "--no-deb",
+        dest="deb",
+        action="store_false",
+        default=argparse.SUPPRESS,
+        help="if set, the deb packages won't be checked",
+    )
+    parser.add_argument("--rpm", default=True, help=argparse.SUPPRESS)
+    parser.add_argument(
+        "--no-rpm",
+        dest="rpm",
+        action="store_false",
+        default=argparse.SUPPRESS,
+        help="if set, the rpm packages won't be checked",
+    )
+    parser.add_argument("--tgz", default=True, help=argparse.SUPPRESS)
+    parser.add_argument(
+        "--no-tgz",
+        dest="tgz",
+        action="store_false",
+        default=argparse.SUPPRESS,
+        help="if set, the tgz packages won't be checked",
+    )
+
+    return parser.parse_args()
+
+
+def main():
+    logging.basicConfig(level=logging.INFO)
+
+    stopwatch = Stopwatch()
+
+    args = parse_args()
+
+    TEMP_PATH.mkdir(parents=True, exist_ok=True)
+
+    pr_info = PRInfo()
+
+    if CI:
+        gh = Github(get_best_robot_token(), per_page=100)
+        atexit.register(update_mergeable_check, gh, pr_info, args.check_name)
+
+        rerun_helper = RerunHelper(gh, pr_info, args.check_name)
+        if rerun_helper.is_already_finished_by_status():
+            logging.info(
+                "Check is already finished according to github status, exiting"
+            )
+            sys.exit(0)
+
+    docker_images = {
+        name: get_image_with_version(REPORTS_PATH, name)
+        for name in (RPM_IMAGE, DEB_IMAGE)
+    }
+    prepare_test_scripts()
+
+    if args.download:
+
+        def filter_artifacts(path: str) -> bool:
+            return (
+                path.endswith(".deb")
+                or path.endswith(".rpm")
+                or path.endswith(".tgz")
+                or path.endswith("/clickhouse")
+            )
+
+        download_builds_filter(
+            args.check_name, REPORTS_PATH, TEMP_PATH, filter_artifacts
+        )
+
+    test_results = []  # type: TestResults
+    if args.deb:
+        test_results.extend(test_install_deb(docker_images[DEB_IMAGE]))
+    if args.rpm:
+        test_results.extend(test_install_rpm(docker_images[RPM_IMAGE]))
+    if args.tgz:
+        test_results.extend(test_install_tgz(docker_images[DEB_IMAGE]))
+        test_results.extend(test_install_tgz(docker_images[RPM_IMAGE]))
+
+    state = SUCCESS
+    description = "Packages installed successfully"
+    if FAILURE in (result.status for result in test_results):
+        state = FAILURE
+        description = "Failed to install packages: " + ", ".join(
+            result.name for result in test_results
+        )
+
+    s3_helper = S3Helper()
+
+    report_url = upload_results(
+        s3_helper,
+        pr_info.number,
+        pr_info.sha,
+        test_results,
+        [],
+        args.check_name,
+    )
+    print(f"::notice ::Report url: {report_url}")
+    if not CI:
+        return
+
+    ch_helper = ClickHouseHelper()
+    mark_flaky_tests(ch_helper, args.check_name, test_results)
+
+    if len(description) >= 140:
+        description = description[:136] + "..."
+
+    post_commit_status(gh, pr_info.sha, args.check_name, description, state, report_url)
+
+    prepared_events = prepare_tests_results_for_clickhouse(
+        pr_info,
+        test_results,
+        state,
+        stopwatch.duration_seconds,
+        stopwatch.start_time_str,
+        report_url,
+        args.check_name,
+    )
+
+    ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
+
+    if state == FAILURE:
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
--- a/tests/ci/unit_tests_check.py
+++ b/tests/ci/unit_tests_check.py
@ -108,13 +108,10 @@ def main():

    stopwatch = Stopwatch()

-    temp_path = TEMP_PATH
-    reports_path = REPORTS_PATH
-
    check_name = sys.argv[1]

-    if not os.path.exists(temp_path):
-        os.makedirs(temp_path)
+    if not os.path.exists(TEMP_PATH):
+        os.makedirs(TEMP_PATH)

    pr_info = PRInfo()

@ -127,14 +124,14 @@ def main():
        logging.info("Check is already finished according to github status, exiting")
        sys.exit(0)

-    docker_image = get_image_with_version(reports_path, IMAGE_NAME)
+    docker_image = get_image_with_version(REPORTS_PATH, IMAGE_NAME)

-    download_unit_tests(check_name, reports_path, temp_path)
+    download_unit_tests(check_name, REPORTS_PATH, TEMP_PATH)

-    tests_binary_path = os.path.join(temp_path, "unit_tests_dbms")
+    tests_binary_path = os.path.join(TEMP_PATH, "unit_tests_dbms")
    os.chmod(tests_binary_path, 0o777)

-    test_output = os.path.join(temp_path, "test_output")
+    test_output = os.path.join(TEMP_PATH, "test_output")
    if not os.path.exists(test_output):
        os.makedirs(test_output)

@ -151,7 +148,7 @@ def main():
        else:
            logging.info("Run failed")

-    subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
+    subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {TEMP_PATH}", shell=True)

    s3_helper = S3Helper()
    state, description, test_results, additional_logs = process_results(test_output)
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@ -449,21 +449,31 @@ class FailureReason(enum.Enum):
    INTERNAL_ERROR = "Test internal error: "


+def threshold_generator(always_on_prob, always_off_prob, min_val, max_val):
+    def gen():
+        tmp = random.random()
+        if tmp <= always_on_prob:
+            return min_val
+        if tmp <= always_on_prob + always_off_prob:
+            return max_val
+
+        if isinstance(min_val, int) and isinstance(max_val, int):
+            return random.randint(min_val, max_val)
+        else:
+            return random.uniform(min_val, max_val)
+
+    return gen
+
+
 class SettingsRandomizer:
    settings = {
        "max_insert_threads": lambda: 0
        if random.random() < 0.5
        else random.randint(1, 16),
-        "group_by_two_level_threshold": lambda: 1
-        if random.random() < 0.1
-        else 2**60
-        if random.random() < 0.11
-        else 100000,
-        "group_by_two_level_threshold_bytes": lambda: 1
-        if random.random() < 0.1
-        else 2**60
-        if random.random() < 0.11
-        else 50000000,
+        "group_by_two_level_threshold": threshold_generator(0.2, 0.2, 1, 1000000),
+        "group_by_two_level_threshold_bytes": threshold_generator(
+            0.2, 0.2, 1, 50000000
+        ),
        "distributed_aggregation_memory_efficient": lambda: random.randint(0, 1),
        "fsync_metadata": lambda: random.randint(0, 1),
        "output_format_parallel_formatting": lambda: random.randint(0, 1),
@ -480,17 +490,15 @@ class SettingsRandomizer:
        "read_in_order_two_level_merge_threshold": lambda: random.randint(0, 100),
        "optimize_aggregation_in_order": lambda: random.randint(0, 1),
        "aggregation_in_order_max_block_bytes": lambda: random.randint(0, 50000000),
+        "min_compress_block_size": lambda: random.randint(1, 1048576 * 3),
+        "max_compress_block_size": lambda: random.randint(1, 1048576 * 3),
        "use_uncompressed_cache": lambda: random.randint(0, 1),
-        "min_bytes_to_use_direct_io": lambda: 0
-        if random.random() < 0.5
-        else 1
-        if random.random() < 0.2
-        else random.randint(1, 1024 * 1024 * 1024),
-        "min_bytes_to_use_mmap_io": lambda: 0
-        if random.random() < 0.5
-        else 1
-        if random.random() < 0.2
-        else random.randint(1, 1024 * 1024 * 1024),
+        "min_bytes_to_use_direct_io": threshold_generator(
+            0.2, 0.5, 1, 10 * 1024 * 1024 * 1024
+        ),
+        "min_bytes_to_use_mmap_io": threshold_generator(
+            0.2, 0.5, 1, 10 * 1024 * 1024 * 1024
+        ),
        "local_filesystem_read_method": lambda: random.choice(
            ["read", "pread", "mmap", "pread_threadpool", "io_uring"]
        ),
@ -514,6 +522,39 @@ class SettingsRandomizer:
        return random_settings


+class MergeTreeSettingsRandomizer:
+    settings = {
+        # Temporary disable due to large number of failures. TODO: fix.
+        # "ratio_of_defaults_for_sparse_serialization": threshold_generator(
+        #     0.1, 0.6, 0.0, 1.0
+        # ),
+        "prefer_fetch_merged_part_size_threshold": threshold_generator(
+            0.2, 0.5, 1, 10 * 1024 * 1024 * 1024
+        ),
+        "vertical_merge_algorithm_min_rows_to_activate": threshold_generator(
+            0.4, 0.4, 1, 1000000
+        ),
+        "vertical_merge_algorithm_min_columns_to_activate": threshold_generator(
+            0.4, 0.4, 1, 100
+        ),
+        "min_merge_bytes_to_use_direct_io": threshold_generator(
+            0.25, 0.25, 1, 10 * 1024 * 1024 * 1024
+        ),
+        "index_granularity_bytes": lambda: random.randint(1024, 30 * 1024 * 1024),
+        "merge_max_block_size": lambda: random.randint(1, 8192 * 3),
+        "index_granularity": lambda: random.randint(1, 65536),
+        "min_bytes_for_wide_part": threshold_generator(0.3, 0.3, 0, 1024 * 1024 * 1024),
+    }
+
+    @staticmethod
+    def get_random_settings(args):
+        random_settings = []
+        for setting, generator in MergeTreeSettingsRandomizer.settings.items():
+            if setting not in args.changed_merge_tree_settings:
+                random_settings.append(f"{setting}={generator()}")
+        return random_settings
+
+
 class TestResult:
    def __init__(
        self,
@ -618,41 +659,48 @@ class TestCase:

        return testcase_args

-    def cli_random_settings(self) -> str:
-        return " ".join([f"--{setting}" for setting in self.random_settings])
+    @staticmethod
+    def cli_format_settings(settings_list) -> str:
+        return " ".join([f"--{setting}" for setting in settings_list])

-    def add_random_settings(self, args, client_options):
-        if self.tags and "no-random-settings" in self.tags:
-            return client_options
-        if args.no_random_settings:
-            return client_options
+    def has_show_create_table_in_test(self):
+        return not subprocess.call(["grep", "-iq", "show create", self.case_file])

-        if len(self.base_url_params) == 0:
-            os.environ["CLICKHOUSE_URL_PARAMS"] = "&".join(self.random_settings)
-        else:
-            os.environ["CLICKHOUSE_URL_PARAMS"] = (
-                self.base_url_params + "&" + "&".join(self.random_settings)
+    def add_random_settings(self, client_options):
+        new_options = ""
+        if self.randomize_settings:
+            if len(self.base_url_params) == 0:
+                os.environ["CLICKHOUSE_URL_PARAMS"] = "&".join(self.random_settings)
+            else:
+                os.environ["CLICKHOUSE_URL_PARAMS"] = (
+                    self.base_url_params + "&" + "&".join(self.random_settings)
+                )
+
+            new_options += f" {self.cli_format_settings(self.random_settings)}"
+
+        if self.randomize_merge_tree_settings:
+            new_options += f" --allow_merge_tree_settings {self.cli_format_settings(self.merge_tree_random_settings)}"
+
+        if new_options != "":
+            new_options += " --allow_repeated_settings"
+
+            os.environ["CLICKHOUSE_CLIENT_OPT"] = (
+                self.base_client_options + new_options + " "
            )

-        new_options = f" --allow_repeated_settings {self.cli_random_settings()}"
-        os.environ["CLICKHOUSE_CLIENT_OPT"] = (
-            self.base_client_options + new_options + " "
-        )
        return client_options + new_options

    def remove_random_settings_from_env(self):
        os.environ["CLICKHOUSE_URL_PARAMS"] = self.base_url_params
        os.environ["CLICKHOUSE_CLIENT_OPT"] = self.base_client_options

-    def add_info_about_settings(self, args, description):
-        if self.tags and "no-random-settings" in self.tags:
-            return description
-        if args.no_random_settings:
-            return description
+    def add_info_about_settings(self, description):
+        if self.randomize_settings:
+            description += f"\nSettings used in the test: {self.cli_format_settings(self.random_settings)}"
+        if self.randomize_merge_tree_settings:
+            description += f"\n\nMergeTree settings used in test: {self.cli_format_settings(self.merge_tree_random_settings)}"

-        return (
-            f"{description}\nSettings used in the test: {self.cli_random_settings()}\n"
-        )
+        return description + "\n"

    def __init__(self, suite, case: str, args, is_concurrent: bool):
        self.case: str = case  # case file name
@ -676,12 +724,40 @@ class TestCase:
        self.testcase_args = None
        self.runs_count = 0

-        self.random_settings = SettingsRandomizer.get_random_settings()
+        has_no_random_settings_tag = self.tags and "no-random-settings" in self.tags
+
+        self.randomize_settings = not (
+            args.no_random_settings or has_no_random_settings_tag
+        )
+
+        has_no_random_merge_tree_settings_tag = (
+            self.tags and "no-random-merge-tree-settings" in self.tags
+        )
+
+        # If test contains SHOW CREATE TABLE do not
+        # randomize merge tree settings, because
+        # they will be added to table definition and test will fail
+        self.randomize_merge_tree_settings = not (
+            args.no_random_merge_tree_settings
+            or has_no_random_settings_tag
+            or has_no_random_merge_tree_settings_tag
+            or self.has_show_create_table_in_test()
+        )
+
+        if self.randomize_settings:
+            self.random_settings = SettingsRandomizer.get_random_settings()
+
+        if self.randomize_merge_tree_settings:
+            self.merge_tree_random_settings = (
+                MergeTreeSettingsRandomizer.get_random_settings(args)
+            )
+
        self.base_url_params = (
            os.environ["CLICKHOUSE_URL_PARAMS"]
            if "CLICKHOUSE_URL_PARAMS" in os.environ
            else ""
        )
+
        self.base_client_options = (
            os.environ["CLICKHOUSE_CLIENT_OPT"]
            if "CLICKHOUSE_CLIENT_OPT" in os.environ
@ -1136,7 +1212,7 @@ class TestCase:
            self.testcase_args = self.configure_testcase_args(
                args, self.case_file, suite.suite_tmp_path
            )
-            client_options = self.add_random_settings(args, client_options)
+            client_options = self.add_random_settings(client_options)
            proc, stdout, stderr, debug_log, total_time = self.run_single_test(
                server_logs_level, client_options
            )
@ -1149,9 +1225,7 @@ class TestCase:
            result.description = result.description.replace('\0', '')

            if result.status == TestStatus.FAIL:
-                result.description = self.add_info_about_settings(
-                    args, result.description
-                )
+                result.description = self.add_info_about_settings(result.description)
            return result
        except KeyboardInterrupt as e:
            raise e
@ -1162,7 +1236,7 @@ class TestCase:
                FailureReason.INTERNAL_QUERY_FAIL,
                0.0,
                self.add_info_about_settings(
-                    args, self.get_description_from_exception_info(sys.exc_info())
+                    self.get_description_from_exception_info(sys.exc_info())
                ),
            )
        except (ConnectionError, http.client.ImproperConnectionState):
@ -1172,7 +1246,7 @@ class TestCase:
                FailureReason.SERVER_DIED,
                0.0,
                self.add_info_about_settings(
-                    args, self.get_description_from_exception_info(sys.exc_info())
+                    self.get_description_from_exception_info(sys.exc_info())
                ),
            )
        except Exception:
@ -1680,6 +1754,19 @@ def collect_build_flags(args):
    return result


+def collect_changed_merge_tree_settings(args):
+    changed_settings = (
+        clickhouse_execute(
+            args,
+            "SELECT name FROM system.merge_tree_settings WHERE changed",
+        )
+        .strip()
+        .splitlines()
+    )
+
+    return list(map(lambda s: s.decode(), changed_settings))
+
+
 def check_table_column(args, database, table, column):
    return (
        int(
@ -1984,6 +2071,7 @@ def main(args):
        raise Exception(msg)

    args.build_flags = collect_build_flags(args)
+    args.changed_merge_tree_settings = collect_changed_merge_tree_settings(args)
    args.suppport_system_processes_is_all_data_sent = check_table_column(
        args, "system", "processes", "is_all_data_sent"
    )
@ -2328,7 +2416,12 @@ if __name__ == "__main__":
        default=False,
        help="Disable settings randomization",
    )
-
+    parser.add_argument(
+        "--no-random-merge-tree-settings",
+        action="store_true",
+        default=False,
+        help="Disable MergeTree settings randomization",
+    )
    parser.add_argument(
        "--run-by-hash-num",
        type=int,
--- a/tests/integration/helpers/cluster.py
+++ b/tests/integration/helpers/cluster.py
@ -2856,7 +2856,10 @@ class ClickHouseCluster:
                    SANITIZER_SIGN, from_host=True, filename="stderr.log"
                ):
                    sanitizer_assert_instance = instance.grep_in_log(
-                        SANITIZER_SIGN, from_host=True, filename="stderr.log"
+                        SANITIZER_SIGN,
+                        from_host=True,
+                        filename="stderr.log",
+                        after=1000,
                    )
                    logging.error(
                        "Sanitizer in instance %s log %s",
@ -2897,8 +2900,8 @@ class ClickHouseCluster:

        if sanitizer_assert_instance is not None:
            raise Exception(
-                "Sanitizer assert found in {} for instance {}".format(
-                    self.docker_logs_path, sanitizer_assert_instance
+                "Sanitizer assert found for instance {}".format(
+                    sanitizer_assert_instance
                )
            )
        if fatal_log is not None:
@ -3652,15 +3655,21 @@ class ClickHouseInstance:
            )
        return len(result) > 0

-    def grep_in_log(self, substring, from_host=False, filename="clickhouse-server.log"):
+    def grep_in_log(
+        self, substring, from_host=False, filename="clickhouse-server.log", after=None
+    ):
        logging.debug(f"grep in log called %s", substring)
+        if after is not None:
+            after_opt = "-A{}".format(after)
+        else:
+            after_opt = ""
        if from_host:
            # We check fist file exists but want to look for all rotated logs as well
            result = subprocess_check_call(
                [
                    "bash",
                    "-c",
-                    f'[ -f {self.logs_dir}/{filename} ] && zgrep -a "{substring}" {self.logs_dir}/{filename}* || true',
+                    f'[ -f {self.logs_dir}/{filename} ] && zgrep {after_opt} -a "{substring}" {self.logs_dir}/{filename}* || true',
                ]
            )
        else:
@ -3668,7 +3677,7 @@ class ClickHouseInstance:
                [
                    "bash",
                    "-c",
-                    f'[ -f /var/log/clickhouse-server/{filename} ] && zgrep -a "{substring}" /var/log/clickhouse-server/{filename}* || true',
+                    f'[ -f /var/log/clickhouse-server/{filename} ] && zgrep {after_opt} -a "{substring}" /var/log/clickhouse-server/{filename}* || true',
                ]
            )
        logging.debug("grep result %s", result)
--- a/tests/integration/test_storage_rabbitmq/test.py
+++ b/tests/integration/test_storage_rabbitmq/test.py
@ -2953,6 +2953,7 @@ def test_rabbitmq_address(rabbitmq_cluster):
    instance2.query("drop table rabbit_out sync")


+@pytest.mark.skip(reason="FIXME: flaky (something with channel.start_consuming()")
 def test_format_with_prefix_and_suffix(rabbitmq_cluster):
    instance.query(
        """
@ -3001,6 +3002,7 @@ def test_format_with_prefix_and_suffix(rabbitmq_cluster):
    )


+@pytest.mark.skip(reason="FIXME: flaky (something with channel.start_consuming()")
 def test_max_rows_per_message(rabbitmq_cluster):
    num_rows = 5

@ -3073,6 +3075,7 @@ def test_max_rows_per_message(rabbitmq_cluster):
    assert result == "0\t0\n10\t100\n20\t200\n30\t300\n40\t400\n"


+@pytest.mark.skip(reason="FIXME: flaky (something with channel.start_consuming()")
 def test_row_based_formats(rabbitmq_cluster):
    num_rows = 10

@ -3169,6 +3172,7 @@ def test_row_based_formats(rabbitmq_cluster):
        assert result == expected


+@pytest.mark.skip(reason="FIXME: flaky (something with channel.start_consuming()")
 def test_block_based_formats_1(rabbitmq_cluster):
    instance.query(
        """
@ -3230,6 +3234,7 @@ def test_block_based_formats_1(rabbitmq_cluster):
    ]


+@pytest.mark.skip(reason="FIXME: flaky (something with channel.start_consuming()")
 def test_block_based_formats_2(rabbitmq_cluster):
    num_rows = 100

--- a/tests/performance/column_array_filter.xml
+++ b/tests/performance/column_array_filter.xml
@ -0,0 +1,12 @@
+<test>
+    <query>SELECT arr FROM (SELECT cast(range(number % 10) as Array(Int128)) AS arr FROM (SELECT * FROM system.numbers LIMIT 10000000) WHERE length(arr) &lt;= 5) format Null</query>
+    <query>SELECT arr FROM (SELECT cast(range(number % 10) as Array(UInt128)) AS arr FROM (SELECT * FROM system.numbers LIMIT 10000000) WHERE length(arr) &lt;= 5) format Null</query>
+
+    <query>SELECT arr FROM (SELECT cast(range(number % 10) as Array(Int256)) AS arr FROM (SELECT * FROM system.numbers LIMIT 10000000) WHERE length(arr) &lt;= 5) format Null</query>
+    <query>SELECT arr FROM (SELECT cast(range(number % 10) as Array(UInt256)) AS arr FROM (SELECT * FROM system.numbers LIMIT 10000000) WHERE length(arr) &lt;= 5) format Null</query>
+
+    <query>SELECT arr FROM (SELECT cast(range(number % 10) as Array(Decimal32(0))) AS arr FROM (SELECT * FROM system.numbers LIMIT 10000000) WHERE length(arr) &lt;= 5) format Null</query>
+    <query>SELECT arr FROM (SELECT cast(range(number % 10) as Array(Decimal64(0))) AS arr FROM (SELECT * FROM system.numbers LIMIT 10000000) WHERE length(arr) &lt;= 5) format Null</query>
+    <query>SELECT arr FROM (SELECT cast(range(number % 10) as Array(Decimal128(0))) AS arr FROM (SELECT * FROM system.numbers LIMIT 10000000) WHERE length(arr) &lt;= 5) format Null</query>
+    <query>SELECT arr FROM (SELECT cast(range(number % 10) as Array(Decimal256(0))) AS arr FROM (SELECT * FROM system.numbers LIMIT 10000000) WHERE length(arr) &lt;= 5) format Null</query>
+</test>
--- a/tests/performance/column_array_replicate.xml
+++ b/tests/performance/column_array_replicate.xml
@ -0,0 +1,12 @@
+<test>
+    <query>with cast([1,2,3,4] as Array(Int128)) as elem select arrayWithConstant(rand() % 10 + 5, materialize(elem)) from numbers(1000000) format Null</query>
+    <query>with cast([1,2,3,4] as Array(UInt128)) as elem select arrayWithConstant(rand() % 10 + 5, materialize(elem)) from numbers(1000000) format Null</query>
+
+    <query>with cast([1,2,3,4] as Array(Int256)) as elem select arrayWithConstant(rand() % 10 + 5, materialize(elem)) from numbers(1000000) format Null</query>
+    <query>with cast([1,2,3,4] as Array(UInt256)) as elem select arrayWithConstant(rand() % 10 + 5, materialize(elem)) from numbers(1000000) format Null</query>
+
+    <query>with cast([1,2,3,4] as Array(Decimal32(0))) as elem select arrayWithConstant(rand() % 10 + 5, materialize(elem)) from numbers(1000000) format Null</query>
+    <query>with cast([1,2,3,4] as Array(Decimal64(0))) as elem select arrayWithConstant(rand() % 10 + 5, materialize(elem)) from numbers(1000000) format Null</query>
+    <query>with cast([1,2,3,4] as Array(Decimal128(0))) as elem select arrayWithConstant(rand() % 10 + 5, materialize(elem)) from numbers(1000000) format Null</query>
+    <query>with cast([1,2,3,4] as Array(Decimal256(0))) as elem select arrayWithConstant(rand() % 10 + 5, materialize(elem)) from numbers(1000000) format Null</query>
+</test>
--- a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql
+++ b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql
@ -44,7 +44,7 @@ select 100, max2((select count() from logs where level = 'Warning' and message_f
    group by message_format_string order by count() desc limit 1) / (select count() from logs), 0.005);

 -- Same as above for Error
-select 110, max2((select count() from logs where level = 'Warning' group by message_format_string order by count() desc limit 1) / (select count() from logs), 0.01);
+select 110, max2((select count() from logs where level = 'Error' group by message_format_string order by count() desc limit 1) / (select count() from logs), 0.01);

 -- Avoid too noisy messages: limit the number of messages with high frequency
 select 120, max2(count(), 3) from (select count() / (select count() from logs) as freq, message_format_string from logs group by message_format_string having freq > 0.10);
--- a/tests/queries/0_stateless/00443_preferred_block_size_bytes.sh
+++ b/tests/queries/0_stateless/00443_preferred_block_size_bytes.sh
@ -1,4 +1,5 @@
 #!/usr/bin/env bash
+# Tags: no-random-merge-tree-settings

 set -e

@ -7,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 . "$CURDIR"/../shell_config.sh

 $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS preferred_block_size_bytes"
-$CLICKHOUSE_CLIENT -q "CREATE TABLE preferred_block_size_bytes (p Date, s String) ENGINE = MergeTree PARTITION BY p ORDER BY p SETTINGS index_granularity=1, index_granularity_bytes=0, min_bytes_for_wide_part = 0"
+$CLICKHOUSE_CLIENT -q "CREATE TABLE preferred_block_size_bytes (p Date, s String) ENGINE = MergeTree PARTITION BY p ORDER BY p SETTINGS index_granularity=1, index_granularity_bytes=0, min_bytes_for_wide_part = 0, min_rows_for_wide_part = 0"
 $CLICKHOUSE_CLIENT -q "INSERT INTO preferred_block_size_bytes (s) SELECT '16_bytes_-_-_-_' AS s FROM system.numbers LIMIT 10, 90"
 $CLICKHOUSE_CLIENT -q "OPTIMIZE TABLE preferred_block_size_bytes"
 $CLICKHOUSE_CLIENT --preferred_block_size_bytes=26 -q "SELECT DISTINCT blockSize(), ignore(p, s) FROM preferred_block_size_bytes"
@ -18,7 +19,7 @@ $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS preferred_block_size_bytes"
 # PREWHERE using empty column

 $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS pbs"
-$CLICKHOUSE_CLIENT -q "CREATE TABLE pbs (p Date, i UInt64, sa Array(String)) ENGINE = MergeTree PARTITION BY p ORDER BY p SETTINGS index_granularity=100, index_granularity_bytes=0, min_bytes_for_wide_part = 0"
+$CLICKHOUSE_CLIENT -q "CREATE TABLE pbs (p Date, i UInt64, sa Array(String)) ENGINE = MergeTree PARTITION BY p ORDER BY p SETTINGS index_granularity=100, index_granularity_bytes=0, min_bytes_for_wide_part = 0, min_rows_for_wide_part = 0"
 $CLICKHOUSE_CLIENT -q "INSERT INTO pbs (p, i, sa) SELECT toDate(i % 30) AS p, number AS i, ['a'] AS sa FROM system.numbers LIMIT 1000"
 $CLICKHOUSE_CLIENT -q "ALTER TABLE pbs ADD COLUMN s UInt8 DEFAULT 0"
 $CLICKHOUSE_CLIENT --preferred_block_size_bytes=100000 -q "SELECT count() FROM pbs PREWHERE s = 0"
@ -29,7 +30,7 @@ $CLICKHOUSE_CLIENT -q "DROP TABLE pbs"
 # Nullable PREWHERE

 $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS nullable_prewhere"
-$CLICKHOUSE_CLIENT -q "CREATE TABLE nullable_prewhere (p Date, f Nullable(UInt64), d UInt64) ENGINE = MergeTree PARTITION BY p ORDER BY p SETTINGS index_granularity=8, index_granularity_bytes=0, min_bytes_for_wide_part = 0"
+$CLICKHOUSE_CLIENT -q "CREATE TABLE nullable_prewhere (p Date, f Nullable(UInt64), d UInt64) ENGINE = MergeTree PARTITION BY p ORDER BY p SETTINGS index_granularity=8, index_granularity_bytes=0, min_bytes_for_wide_part = 0, min_rows_for_wide_part = 0"
 $CLICKHOUSE_CLIENT -q "INSERT INTO nullable_prewhere SELECT toDate(0) AS p, if(number % 2 = 0, CAST(number AS Nullable(UInt64)), CAST(NULL AS Nullable(UInt64))) AS f, number as d FROM system.numbers LIMIT 1001"
 $CLICKHOUSE_CLIENT -q "SELECT sum(d), sum(f), max(d) FROM nullable_prewhere PREWHERE NOT isNull(f)"
 $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS nullable_prewhere"
--- a/tests/queries/0_stateless/00626_in_syntax.reference
+++ b/tests/queries/0_stateless/00626_in_syntax.reference
@ -25,15 +25,9 @@
 1
 1
 0
-1
-0
-1
-1
-0
 -
 1
 1
-1
 -
 (1,2)	((1,2),(3,4))	1	1
 -
--- a/tests/queries/0_stateless/00626_in_syntax.sql
+++ b/tests/queries/0_stateless/00626_in_syntax.sql
@ -28,16 +28,10 @@ select 1 in (0 + 1, 1, toInt8(sin(5)));
 select (0 + 1, 1, toInt8(sin(5))) in (0 + 1, 1, toInt8(sin(5)));
 select identity(tuple(1)) in (tuple(1), tuple(2));
 select identity(tuple(1)) in (tuple(0), tuple(2));
-select identity(tuple(1)) in (identity(tuple(1)), tuple(2));
-select identity(tuple(1)) in (identity(tuple(0)), tuple(2));
-select identity(tuple(1)) in (identity(tuple(1)), identity(tuple(2)));
-select identity(tuple(1)) in (identity(tuple(1)), identity(identity(tuple(2))));
-select identity(tuple(1)) in (identity(tuple(0)), identity(identity(tuple(2))));

 select '-';
 select identity((1, 2)) in (1, 2);
 select identity((1, 2)) in ((1, 2), (3, 4));
-select identity((1, 2)) in ((1, 2), identity((3, 4)));

 select '-';
 select (1,2)  as x, ((1,2),(3,4)) as y, 1 in x,  x in y;
@ -50,4 +44,3 @@ select (1, 2) in (select (1, 2));
 select identity(tuple(1)) in (select tuple(1));
 select identity((1, 2)) in (select 1, 2);
 select identity((1, 2)) in (select (1, 2));
-
--- a/tests/queries/0_stateless/00653_verification_monotonic_data_load.sh
+++ b/tests/queries/0_stateless/00653_verification_monotonic_data_load.sh
@ -1,4 +1,5 @@
 #!/usr/bin/env bash
+# Tags: no-random-merge-tree-settings

 #--------------------------------------------
 # Description of test result:
--- a/Show More
+++ b/Show More