From d57ffec72fb3a52b6d642270f3fc3907bcabbe0b Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Thu, 1 Jun 2023 13:45:00 +0000
Subject: [PATCH 01/64] Add signal handler for SIGQUIT

---
 src/Client/ClientBase.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index 77a93a25e9b..29b2eb5ce1e 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -286,7 +286,7 @@ public:
     static Int32 cancelled_status() { return exit_after_signals.load(); }
 };
 
-/// This signal handler is set only for SIGINT.
+/// This signal handler is set for SIGINT and SIGQUIT.
 void interruptSignalHandler(int signum)
 {
     if (QueryInterruptHandler::try_stop())
@@ -325,6 +325,9 @@ void ClientBase::setupSignalHandler()
 
     if (sigaction(SIGINT, &new_act, nullptr))
         throwFromErrno("Cannot set signal handler.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER);
+
+    if (sigaction(SIGQUIT, &new_act, nullptr))
+        throwFromErrno("Cannot set signal handler.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER);
 }
 
 

From 985cd8fc8a0fdbe09a95132f1fb549825ece636f Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Wed, 26 Apr 2023 21:20:19 +0200
Subject: [PATCH 02/64] Improve events logging

---
 tests/ci/terminate_runner_lambda/app.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tests/ci/terminate_runner_lambda/app.py b/tests/ci/terminate_runner_lambda/app.py
index c9192417575..5a78c8c8e39 100644
--- a/tests/ci/terminate_runner_lambda/app.py
+++ b/tests/ci/terminate_runner_lambda/app.py
@@ -160,7 +160,7 @@ def get_candidates_to_be_killed(event_data: dict) -> Dict[str, List[str]]:
 
 
 def main(access_token: str, event: dict) -> Dict[str, List[str]]:
-    print("Got event", json.dumps(event, sort_keys=True, indent=4))
+    print("Got event", json.dumps(event, sort_keys=True).replace("\n", ""))
     to_kill_by_zone = how_many_instances_to_kill(event)
     instances_by_zone = get_candidates_to_be_killed(event)
 
@@ -177,7 +177,8 @@ def main(access_token: str, event: dict) -> Dict[str, List[str]]:
         total_to_kill += num_to_kill
         if num_to_kill > len(candidates):
             raise Exception(
-                f"Required to kill {num_to_kill}, but have only {len(candidates)} candidates in AV {zone}"
+                f"Required to kill {num_to_kill}, but have only {len(candidates)}"
+                f" candidates in AV {zone}"
             )
 
         delete_for_av = []  # type: RunnerDescriptions
@@ -207,7 +208,8 @@ def main(access_token: str, event: dict) -> Dict[str, List[str]]:
 
         if len(delete_for_av) < num_to_kill:
             print(
-                f"Checked all candidates for av {zone}, get to delete {len(delete_for_av)}, but still cannot get required {num_to_kill}"
+                f"Checked all candidates for av {zone}, get to delete "
+                f"{len(delete_for_av)}, but still cannot get required {num_to_kill}"
             )
 
         instances_to_kill += [runner.name for runner in delete_for_av]

From 7bf9089dcd34e0c29b1f951066136fec1d990372 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Thu, 27 Apr 2023 12:00:22 +0200
Subject: [PATCH 03/64] Increase access_token cached time

---
 tests/ci/terminate_runner_lambda/app.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/ci/terminate_runner_lambda/app.py b/tests/ci/terminate_runner_lambda/app.py
index 5a78c8c8e39..390375a34e4 100644
--- a/tests/ci/terminate_runner_lambda/app.py
+++ b/tests/ci/terminate_runner_lambda/app.py
@@ -64,7 +64,7 @@ cached_token = CachedToken(0, "")
 
 
 def get_cached_access_token() -> str:
-    if time.time() - 500 < cached_token.time:
+    if time.time() - 550 < cached_token.time:
         return cached_token.value
     private_key, app_id = get_key_and_app_from_aws()
     payload = {

From bf9b563e0b2c3b1933405aef34d904afc7ade57f Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Thu, 27 Apr 2023 16:54:37 +0200
Subject: [PATCH 04/64] Improve caching mechanism for token, add cached
 instances

---
 tests/ci/terminate_runner_lambda/app.py | 40 ++++++++++++++++++++++++-
 1 file changed, 39 insertions(+), 1 deletion(-)

diff --git a/tests/ci/terminate_runner_lambda/app.py b/tests/ci/terminate_runner_lambda/app.py
index 390375a34e4..bf883880c8d 100644
--- a/tests/ci/terminate_runner_lambda/app.py
+++ b/tests/ci/terminate_runner_lambda/app.py
@@ -58,14 +58,19 @@ def get_access_token(jwt_token: str, installation_id: int) -> str:
 class CachedToken:
     time: int
     value: str
+    updating: bool = False
 
 
 cached_token = CachedToken(0, "")
 
 
 def get_cached_access_token() -> str:
-    if time.time() - 550 < cached_token.time:
+    if time.time() - 550 < cached_token.time or cached_token.updating:
         return cached_token.value
+    # Indicate that the value is updating now, so the cached value can be
+    # used. The first setting and close-to-ttl are not counted as update
+    if cached_token.time != 0 or time.time() - 590 < cached_token.time:
+        cached_token.updating = True
     private_key, app_id = get_key_and_app_from_aws()
     payload = {
         "iat": int(time.time()) - 60,
@@ -77,9 +82,42 @@ def get_cached_access_token() -> str:
     installation_id = get_installation_id(encoded_jwt)
     cached_token.time = int(time.time())
     cached_token.value = get_access_token(encoded_jwt, installation_id)
+    cached_token.updating = False
     return cached_token.value
 
 
+@dataclass
+class CachedInstances:
+    time: int
+    value: dict
+    updating: bool = False
+
+
+cached_instances = CachedInstances(0, {})
+
+
+def get_cached_instances() -> dict:
+    """return cached instances description with updating it once per five minutes"""
+    if time.time() - 250 < cached_instances.time or cached_instances.updating:
+        return cached_instances.value
+    # Indicate that the value is updating now, so the cached value can be
+    # used. The first setting and close-to-ttl are not counted as update
+    if cached_instances.time != 0 or time.time() - 300 < cached_instances.time:
+        cached_instances.updating = True
+    ec2_client = boto3.client("ec2")
+    instances_response = ec2_client.describe_instances(
+        Filters=[{"Name": "instance-state-name", "Values": ["running"]}]
+    )
+    cached_instances.time = int(time.time())
+    cached_instances.value = {
+        instance["InstanceId"]: instance
+        for reservation in instances_response["Reservations"]
+        for instance in reservation["Instances"]
+    }
+    cached_instances.updating = False
+    return cached_instances.value
+
+
 RunnerDescription = namedtuple(
     "RunnerDescription", ["id", "name", "tags", "offline", "busy"]
 )

From 855afb56f913d3d7ba6dcc7f992b59cfaf5cb02e Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Thu, 27 Apr 2023 16:56:17 +0200
Subject: [PATCH 05/64] Get instances for the region to not kill a fresh one

---
 tests/ci/terminate_runner_lambda/app.py | 53 ++++++++++++++++++-------
 1 file changed, 39 insertions(+), 14 deletions(-)

diff --git a/tests/ci/terminate_runner_lambda/app.py b/tests/ci/terminate_runner_lambda/app.py
index bf883880c8d..e0164bc58c0 100644
--- a/tests/ci/terminate_runner_lambda/app.py
+++ b/tests/ci/terminate_runner_lambda/app.py
@@ -198,11 +198,37 @@ def get_candidates_to_be_killed(event_data: dict) -> Dict[str, List[str]]:
 
 
 def main(access_token: str, event: dict) -> Dict[str, List[str]]:
+    start = time.time()
     print("Got event", json.dumps(event, sort_keys=True).replace("\n", ""))
     to_kill_by_zone = how_many_instances_to_kill(event)
     instances_by_zone = get_candidates_to_be_killed(event)
+    # Getting ASG and instances' descriptions from the API
+    # We don't kill instances that alive for less than 10 minutes, since they
+    # could be not in the GH active runners yet
+    print(f"Check other hosts from the same ASG {event['AutoScalingGroupName']}")
+    asg_client = boto3.client("autoscaling")
+    as_groups_response = asg_client.describe_auto_scaling_groups(
+        AutoScalingGroupNames=[event["AutoScalingGroupName"]]
+    )
+    assert len(as_groups_response["AutoScalingGroups"]) == 1
+    asg = as_groups_response["AutoScalingGroups"][0]
+    asg_instance_ids = [instance["InstanceId"] for instance in asg["Instances"]]
+    instance_descriptions = get_cached_instances()
+    # The instances launched less than 10 minutes ago
+    immune_ids = [
+        instance["InstanceId"]
+        for instance in instance_descriptions.values()
+        if start - instance["LaunchTime"].timestamp() < 600
+    ]
+    # if the ASG's instance ID not in instance_descriptions, it's most probably
+    # is not cached yet, so we must mark it as immuned
+    immune_ids.extend(
+        iid for iid in asg_instance_ids if iid not in instance_descriptions
+    )
+    print("Time spent on the requests to AWS: ", time.time() - start)
 
     runners = list_runners(access_token)
+    runner_ids = set(runner.name for runner in runners)
     # We used to delete potential hosts to terminate from GitHub runners pool,
     # but the documentation states:
     # --- Returning an instance first in the response data does not guarantee its termination
@@ -221,13 +247,17 @@ def main(access_token: str, event: dict) -> Dict[str, List[str]]:
 
         delete_for_av = []  # type: RunnerDescriptions
         for candidate in candidates:
-            if candidate not in set(runner.name for runner in runners):
+            if candidate in immune_ids:
+                print(
+                    f"Candidate {candidate} started less than 10 minutes ago, won't touch a child"
+                )
+                break
+            if candidate not in runner_ids:
                 print(
                     f"Candidate {candidate} was not in runners list, simply delete it"
                 )
                 instances_to_kill.append(candidate)
-
-        for candidate in candidates:
+                break
             if len(delete_for_av) + len(instances_to_kill) == num_to_kill:
                 break
             if candidate in instances_to_kill:
@@ -253,16 +283,11 @@ def main(access_token: str, event: dict) -> Dict[str, List[str]]:
         instances_to_kill += [runner.name for runner in delete_for_av]
 
     if len(instances_to_kill) < total_to_kill:
-        print(f"Check other hosts from the same ASG {event['AutoScalingGroupName']}")
-        client = boto3.client("autoscaling")
-        as_groups = client.describe_auto_scaling_groups(
-            AutoScalingGroupNames=[event["AutoScalingGroupName"]]
-        )
-        assert len(as_groups["AutoScalingGroups"]) == 1
-        asg = as_groups["AutoScalingGroups"][0]
-        for instance in asg["Instances"]:
+        for instance in asg_instance_ids:
+            if instance in immune_ids:
+                continue
             for runner in runners:
-                if runner.name == instance["InstanceId"] and not runner.busy:
+                if runner.name == instance and not runner.busy:
                     print(f"Runner {runner.name} is not busy and can be deleted")
                     instances_to_kill.append(runner.name)
 
@@ -270,9 +295,9 @@ def main(access_token: str, event: dict) -> Dict[str, List[str]]:
                 print("Got enough instances to kill")
                 break
 
-    print("Got instances to kill: ", ", ".join(instances_to_kill))
     response = {"InstanceIDs": instances_to_kill}
-    print(response)
+    print("Got instances to kill: ", response)
+    print("Time spent on the request: ", time.time() - start)
     return response
 
 

From db029384110be42a55e8420e6a13091cd2dca164 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Thu, 4 May 2023 12:20:57 +0200
Subject: [PATCH 06/64] Do not count unfinished tasks with conclusion=None

---
 tests/ci/workflow_jobs_lambda/app.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tests/ci/workflow_jobs_lambda/app.py b/tests/ci/workflow_jobs_lambda/app.py
index bc8e1212be5..c4ce68c3f8e 100644
--- a/tests/ci/workflow_jobs_lambda/app.py
+++ b/tests/ci/workflow_jobs_lambda/app.py
@@ -257,6 +257,7 @@ def handler(event: dict, context: Any) -> dict:
     else:
         event_data = json.loads(event["body"])
 
+    logging.info("Got the next raw event from the github hook: %s", event_data)
     repo = event_data["repository"]
     try:
         wf_job = event_data["workflow_job"]
@@ -265,6 +266,9 @@ def handler(event: dict, context: Any) -> dict:
         logging.error("The event data: %s", event)
         logging.error("The context data: %s", context)
 
+    # We record only finished steps
+    steps = len([step for step in wf_job["steps"] if step["conclusion"] is not None])
+
     workflow_job = WorkflowJob(
         wf_job["id"],
         wf_job["run_id"],
@@ -281,7 +285,7 @@ def handler(event: dict, context: Any) -> dict:
         wf_job["started_at"],
         wf_job["completed_at"] or "1970-01-01T00:00:00",  # nullable date
         wf_job["name"],
-        len(wf_job["steps"]),
+        steps,
         wf_job["check_run_url"],
         wf_job["labels"],
         wf_job["runner_id"] or 0,  # nullable

From 27941b4d2603a1308978bdc32c6db4d6ed0da7ef Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Mon, 22 May 2023 17:57:12 +0200
Subject: [PATCH 07/64] Decrease the time window for autoscale_runners_lambda

---
 tests/ci/autoscale_runners_lambda/app.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/ci/autoscale_runners_lambda/app.py b/tests/ci/autoscale_runners_lambda/app.py
index cbc9f4f8901..3fbab0d13dc 100644
--- a/tests/ci/autoscale_runners_lambda/app.py
+++ b/tests/ci/autoscale_runners_lambda/app.py
@@ -22,10 +22,13 @@ RUNNER_TYPE_LABELS = [
     "style-checker-aarch64",
 ]
 
+### Update comment on the change ###
 # 4 HOUR - is a balance to get the most precise values
 #   - Our longest possible running check is around 5h on the worst scenario
 #   - The long queue won't be wiped out and replaced, so the measurmenet is fine
 #   - If the data is spoiled by something, we are from the bills perspective
+# Changed it to 3 HOUR: in average we have 1h tasks, but p90 is around 2h.
+# With 4h we have too much wasted computing time in case of issues with DB
 QUEUE_QUERY = f"""SELECT
     last_status AS status,
     toUInt32(count()) AS length,
@@ -40,7 +43,7 @@ FROM
     FROM default.workflow_jobs
     WHERE has(labels, 'self-hosted')
         AND hasAny({RUNNER_TYPE_LABELS}, labels)
-        AND started_at > now() - INTERVAL 4 HOUR
+        AND started_at > now() - INTERVAL 3 HOUR
     GROUP BY ALL
     HAVING last_status IN ('in_progress', 'queued')
 )

From 484c91c47e3aa8365eec5ae29d03fb66a8372bb0 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Mon, 22 May 2023 20:39:22 +0200
Subject: [PATCH 08/64] Add DRY_RUN and configurable PY_VERSION to lambda
 deployment

---
 .../ci/team_keys_lambda/build_and_deploy_archive.sh  | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/tests/ci/team_keys_lambda/build_and_deploy_archive.sh b/tests/ci/team_keys_lambda/build_and_deploy_archive.sh
index 4aee85c588a..f35d6456cd3 100644
--- a/tests/ci/team_keys_lambda/build_and_deploy_archive.sh
+++ b/tests/ci/team_keys_lambda/build_and_deploy_archive.sh
@@ -5,11 +5,17 @@ WORKDIR=$(dirname "$0")
 WORKDIR=$(readlink -f "${WORKDIR}")
 cd "$WORKDIR"
 
-PY_VERSION=3.10
+# Do not deploy the lambda to AWS
+DRY_RUN=${DRY_RUN:-}
+# Python runtime to install dependencies
+PY_VERSION=${PY_VERSION:-3.10}
 PY_EXEC="python${PY_VERSION}"
+# Image to build the lambda zip package
 DOCKER_IMAGE="python:${PY_VERSION}-slim"
 LAMBDA_NAME=$(basename "$WORKDIR")
+# Rename the_lambda_name directory to the-lambda-name lambda in AWS
 LAMBDA_NAME=${LAMBDA_NAME//_/-}
+# The name of directory with lambda code
 PACKAGE=lambda-package
 rm -rf "$PACKAGE" "$PACKAGE".zip
 mkdir "$PACKAGE"
@@ -28,4 +34,6 @@ if [ -f requirements.txt ]; then
 fi
 ( cd "$PACKAGE" && zip -9 -r ../"$PACKAGE".zip . )
 
-aws lambda update-function-code --function-name "$LAMBDA_NAME" --zip-file fileb://"$PACKAGE".zip
+if [ -z "$DRY_RUN" ]; then
+  aws lambda update-function-code --function-name "$LAMBDA_NAME" --zip-file fileb://"$PACKAGE".zip
+fi

From 7f08f218d9ee71ba52d011cabce2faea7492c5ca Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Mon, 22 May 2023 23:07:35 +0200
Subject: [PATCH 09/64] Create lambda_shared package for lambdas

---
 .github/workflows/pull_request.yml            |  5 ++
 tests/ci/autoscale_runners_lambda/app.py      | 77 +++----------------
 .../autoscale_runners_lambda_test.py          |  2 +-
 .../ci/autoscale_runners_lambda/lambda_shared |  1 +
 .../autoscale_runners_lambda/requirements.txt |  2 +-
 tests/ci/lambda_shared_package/.gitignore     |  2 +
 .../lambda_shared/__init__.py                 | 74 ++++++++++++++++++
 tests/ci/lambda_shared_package/pyproject.toml | 13 ++++
 tests/ci/lambda_shared_package/setup.cfg      |  8 ++
 .../build_and_deploy_archive.sh               | 11 +--
 10 files changed, 121 insertions(+), 74 deletions(-)
 rename tests/ci/{ => autoscale_runners_lambda}/autoscale_runners_lambda_test.py (98%)
 create mode 120000 tests/ci/autoscale_runners_lambda/lambda_shared
 create mode 100644 tests/ci/lambda_shared_package/.gitignore
 create mode 100644 tests/ci/lambda_shared_package/lambda_shared/__init__.py
 create mode 100644 tests/ci/lambda_shared_package/pyproject.toml
 create mode 100644 tests/ci/lambda_shared_package/setup.cfg

diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index 506ed451b6d..afc08f3e637 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -46,7 +46,12 @@ jobs:
       - name: Python unit tests
         run: |
           cd "$GITHUB_WORKSPACE/tests/ci"
+          echo "Testing the main ci directory"
           python3 -m unittest discover -s . -p '*_test.py'
+          for dir in *_lambda/; do
+            echo "Testing $dir"
+            python3 -m unittest discover -s "$dir" -p '*_test.py'
+          done
   DockerHubPushAarch64:
     needs: CheckLabels
     runs-on: [self-hosted, style-checker-aarch64]
diff --git a/tests/ci/autoscale_runners_lambda/app.py b/tests/ci/autoscale_runners_lambda/app.py
index 3fbab0d13dc..ab09afb3aa8 100644
--- a/tests/ci/autoscale_runners_lambda/app.py
+++ b/tests/ci/autoscale_runners_lambda/app.py
@@ -2,25 +2,19 @@
 
 """The lambda to decrease/increase ASG desired capacity based on current queue"""
 
-import json
 import logging
-import time
 from dataclasses import dataclass
 from pprint import pformat
 from typing import Any, List, Literal, Optional, Tuple
 
 import boto3  # type: ignore
-import requests  # type: ignore
 
-RUNNER_TYPE_LABELS = [
-    "builder",
-    "func-tester",
-    "func-tester-aarch64",
-    "fuzzer-unit-tester",
-    "stress-tester",
-    "style-checker",
-    "style-checker-aarch64",
-]
+from lambda_shared import (
+    CHException,
+    ClickHouseHelper,
+    RUNNER_TYPE_LABELS,
+    get_parameter_from_ssm,
+)
 
 ### Update comment on the change ###
 # 4 HOUR - is a balance to get the most precise values
@@ -74,61 +68,7 @@ def get_scales(runner_type: str) -> Tuple[int, int]:
     return scale_down, scale_up
 
 
-### VENDORING
-def get_parameter_from_ssm(name, decrypt=True, client=None):
-    if not client:
-        client = boto3.client("ssm", region_name="us-east-1")
-    return client.get_parameter(Name=name, WithDecryption=decrypt)["Parameter"]["Value"]
-
-
-class CHException(Exception):
-    pass
-
-
-class ClickHouseHelper:
-    def __init__(
-        self,
-        url: Optional[str] = None,
-        user: Optional[str] = None,
-        password: Optional[str] = None,
-    ):
-        self.url = url
-        self.auth = {}
-        if user:
-            self.auth["X-ClickHouse-User"] = user
-        if password:
-            self.auth["X-ClickHouse-Key"] = password
-
-    def _select_and_get_json_each_row(self, db, query):
-        params = {
-            "database": db,
-            "query": query,
-            "default_format": "JSONEachRow",
-        }
-        for i in range(5):
-            response = None
-            try:
-                response = requests.get(self.url, params=params, headers=self.auth)
-                response.raise_for_status()
-                return response.text
-            except Exception as ex:
-                logging.warning("Cannot fetch data with exception %s", str(ex))
-                if response:
-                    logging.warning("Reponse text %s", response.text)
-                time.sleep(0.1 * i)
-
-        raise CHException("Cannot fetch data from clickhouse")
-
-    def select_json_each_row(self, db, query):
-        text = self._select_and_get_json_each_row(db, query)
-        result = []
-        for line in text.split("\n"):
-            if line:
-                result.append(json.loads(line))
-        return result
-
-
-CH_CLIENT = ClickHouseHelper(get_parameter_from_ssm("clickhouse-test-stat-url"), "play")
+CH_CLIENT = None  # type: Optional[ClickHouseHelper]
 
 
 def set_capacity(
@@ -222,6 +162,9 @@ def main(dry_run: bool = True) -> None:
     asg_client = boto3.client("autoscaling")
     try:
         global CH_CLIENT
+        CH_CLIENT = CH_CLIENT or ClickHouseHelper(
+            get_parameter_from_ssm("clickhouse-test-stat-url"), "play"
+        )
         queues = CH_CLIENT.select_json_each_row("default", QUEUE_QUERY)
     except CHException as ex:
         logging.exception(
diff --git a/tests/ci/autoscale_runners_lambda_test.py b/tests/ci/autoscale_runners_lambda/autoscale_runners_lambda_test.py
similarity index 98%
rename from tests/ci/autoscale_runners_lambda_test.py
rename to tests/ci/autoscale_runners_lambda/autoscale_runners_lambda_test.py
index 8e3828f51c0..6772e33374c 100644
--- a/tests/ci/autoscale_runners_lambda_test.py
+++ b/tests/ci/autoscale_runners_lambda/autoscale_runners_lambda_test.py
@@ -4,7 +4,7 @@ import unittest
 from dataclasses import dataclass
 from typing import Any, List
 
-from autoscale_runners_lambda.app import set_capacity, Queue
+from app import set_capacity, Queue
 
 
 @dataclass
diff --git a/tests/ci/autoscale_runners_lambda/lambda_shared b/tests/ci/autoscale_runners_lambda/lambda_shared
new file mode 120000
index 00000000000..ba86e090f6c
--- /dev/null
+++ b/tests/ci/autoscale_runners_lambda/lambda_shared
@@ -0,0 +1 @@
+../lambda_shared_package/lambda_shared
\ No newline at end of file
diff --git a/tests/ci/autoscale_runners_lambda/requirements.txt b/tests/ci/autoscale_runners_lambda/requirements.txt
index 3bcbe2dfd07..098e04a9798 100644
--- a/tests/ci/autoscale_runners_lambda/requirements.txt
+++ b/tests/ci/autoscale_runners_lambda/requirements.txt
@@ -1 +1 @@
-requests<2.30
+../lambda_shared_package
diff --git a/tests/ci/lambda_shared_package/.gitignore b/tests/ci/lambda_shared_package/.gitignore
new file mode 100644
index 00000000000..59d52651e06
--- /dev/null
+++ b/tests/ci/lambda_shared_package/.gitignore
@@ -0,0 +1,2 @@
+build
+*.egg-info
diff --git a/tests/ci/lambda_shared_package/lambda_shared/__init__.py b/tests/ci/lambda_shared_package/lambda_shared/__init__.py
new file mode 100644
index 00000000000..c5ae4df9e17
--- /dev/null
+++ b/tests/ci/lambda_shared_package/lambda_shared/__init__.py
@@ -0,0 +1,74 @@
+"""The shared code and types for all our CI lambdas
+It exists as __init__.py and lambda_shared/__init__.py to work both in local and venv"""
+
+import json
+import logging
+import time
+from typing import List, Optional
+
+import boto3  # type: ignore
+import requests  # type: ignore
+
+RUNNER_TYPE_LABELS = [
+    "builder",
+    "func-tester",
+    "func-tester-aarch64",
+    "fuzzer-unit-tester",
+    "stress-tester",
+    "style-checker",
+    "style-checker-aarch64",
+]
+
+
+### VENDORING
+def get_parameter_from_ssm(name, decrypt=True, client=None):
+    if not client:
+        client = boto3.client("ssm", region_name="us-east-1")
+    return client.get_parameter(Name=name, WithDecryption=decrypt)["Parameter"]["Value"]
+
+
+class CHException(Exception):
+    pass
+
+
+class ClickHouseHelper:
+    def __init__(
+        self,
+        url: Optional[str] = None,
+        user: Optional[str] = None,
+        password: Optional[str] = None,
+    ):
+        self.url = url
+        self.auth = {}
+        if user:
+            self.auth["X-ClickHouse-User"] = user
+        if password:
+            self.auth["X-ClickHouse-Key"] = password
+
+    def _select_and_get_json_each_row(self, db: str, query: str) -> str:
+        params = {
+            "database": db,
+            "query": query,
+            "default_format": "JSONEachRow",
+        }
+        for i in range(5):
+            response = None
+            try:
+                response = requests.get(self.url, params=params, headers=self.auth)
+                response.raise_for_status()
+                return response.text  # type: ignore
+            except Exception as ex:
+                logging.warning("Cannot fetch data with exception %s", str(ex))
+                if response:
+                    logging.warning("Reponse text %s", response.text)
+                time.sleep(0.1 * i)
+
+        raise CHException("Cannot fetch data from clickhouse")
+
+    def select_json_each_row(self, db: str, query: str) -> List[dict]:  # type: ignore
+        text = self._select_and_get_json_each_row(db, query)
+        result = []
+        for line in text.split("\n"):
+            if line:
+                result.append(json.loads(line))
+        return result
diff --git a/tests/ci/lambda_shared_package/pyproject.toml b/tests/ci/lambda_shared_package/pyproject.toml
new file mode 100644
index 00000000000..8b4b0a80948
--- /dev/null
+++ b/tests/ci/lambda_shared_package/pyproject.toml
@@ -0,0 +1,13 @@
+[build-system]
+requires = ["setuptools"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "lambda_shared"
+version = "0.0.1"
+dependencies = [
+  "requests < 2.30",
+]
+
+[tool.distutils.bdist_wheel]
+universal = true
diff --git a/tests/ci/lambda_shared_package/setup.cfg b/tests/ci/lambda_shared_package/setup.cfg
new file mode 100644
index 00000000000..744280ae41b
--- /dev/null
+++ b/tests/ci/lambda_shared_package/setup.cfg
@@ -0,0 +1,8 @@
+### This file exists for clear builds in docker ###
+#   without it the `build` directory wouldn't be  #
+# updated on the fly and will require manual clean #
+[build]
+build_base = /tmp/lambda_shared
+
+[egg_info]
+egg_base = /tmp/
diff --git a/tests/ci/team_keys_lambda/build_and_deploy_archive.sh b/tests/ci/team_keys_lambda/build_and_deploy_archive.sh
index f35d6456cd3..89a2d514965 100644
--- a/tests/ci/team_keys_lambda/build_and_deploy_archive.sh
+++ b/tests/ci/team_keys_lambda/build_and_deploy_archive.sh
@@ -3,6 +3,7 @@ set -xeo pipefail
 
 WORKDIR=$(dirname "$0")
 WORKDIR=$(readlink -f "${WORKDIR}")
+DIR_NAME=$(basename "$WORKDIR")
 cd "$WORKDIR"
 
 # Do not deploy the lambda to AWS
@@ -12,9 +13,8 @@ PY_VERSION=${PY_VERSION:-3.10}
 PY_EXEC="python${PY_VERSION}"
 # Image to build the lambda zip package
 DOCKER_IMAGE="python:${PY_VERSION}-slim"
-LAMBDA_NAME=$(basename "$WORKDIR")
 # Rename the_lambda_name directory to the-lambda-name lambda in AWS
-LAMBDA_NAME=${LAMBDA_NAME//_/-}
+LAMBDA_NAME=${DIR_NAME//_/-}
 # The name of directory with lambda code
 PACKAGE=lambda-package
 rm -rf "$PACKAGE" "$PACKAGE".zip
@@ -23,8 +23,9 @@ cp app.py "$PACKAGE"
 if [ -f requirements.txt ]; then
   VENV=lambda-venv
   rm -rf "$VENV" lambda-package.zip
-  docker run --rm --user="${UID}" --volume="${WORKDIR}:/lambda" --workdir="/lambda" "${DOCKER_IMAGE}" \
-    /bin/bash -c "
+  docker run --rm --user="${UID}" -e HOME=/tmp \
+    --volume="${WORKDIR}/..:/ci" --workdir="/ci/${DIR_NAME}" "${DOCKER_IMAGE}" \
+    /bin/bash -exc "
       '$PY_EXEC' -m venv '$VENV' &&
       source '$VENV/bin/activate' &&
       pip install -r requirements.txt
@@ -35,5 +36,5 @@ fi
 ( cd "$PACKAGE" && zip -9 -r ../"$PACKAGE".zip . )
 
 if [ -z "$DRY_RUN" ]; then
-  aws lambda update-function-code --function-name "$LAMBDA_NAME" --zip-file fileb://"$PACKAGE".zip
+  aws lambda update-function-code --function-name "$LAMBDA_NAME" --zip-file fileb://"$WORKDIR/$PACKAGE".zip
 fi

From 0fa6a8416148a9f47f8d5c4e6a6bdb67992d2cd4 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Tue, 23 May 2023 12:55:23 +0200
Subject: [PATCH 10/64] Move the stuff related to runners to lambda_shared

---
 tests/ci/ci_runners_metrics_lambda/app.py     | 68 ++-----------------
 .../ci_runners_metrics_lambda/lambda_shared   |  1 +
 .../requirements.txt                          |  2 +-
 .../lambda_shared/__init__.py                 | 67 ++++++++++++++++--
 tests/ci/terminate_runner_lambda/app.py       | 55 +--------------
 .../ci/terminate_runner_lambda/lambda_shared  |  1 +
 .../terminate_runner_lambda/requirements.txt  |  2 +-
 7 files changed, 76 insertions(+), 120 deletions(-)
 create mode 120000 tests/ci/ci_runners_metrics_lambda/lambda_shared
 create mode 120000 tests/ci/terminate_runner_lambda/lambda_shared

diff --git a/tests/ci/ci_runners_metrics_lambda/app.py b/tests/ci/ci_runners_metrics_lambda/app.py
index 341e1b674ec..a12143752a1 100644
--- a/tests/ci/ci_runners_metrics_lambda/app.py
+++ b/tests/ci/ci_runners_metrics_lambda/app.py
@@ -10,7 +10,6 @@ import argparse
 import sys
 import json
 import time
-from collections import namedtuple
 from datetime import datetime
 from typing import Dict, List, Tuple
 
@@ -19,21 +18,14 @@ import requests  # type: ignore
 import boto3  # type: ignore
 from botocore.exceptions import ClientError  # type: ignore
 
-UNIVERSAL_LABEL = "universal"
-RUNNER_TYPE_LABELS = [
-    "builder",
-    "func-tester",
-    "func-tester-aarch64",
-    "fuzzer-unit-tester",
-    "stress-tester",
-    "style-checker",
-    "style-checker-aarch64",
-]
-
-RunnerDescription = namedtuple(
-    "RunnerDescription", ["id", "name", "tags", "offline", "busy"]
+from lambda_shared import (
+    RUNNER_TYPE_LABELS,
+    RunnerDescription,
+    RunnerDescriptions,
+    list_runners,
 )
-RunnerDescriptions = List[RunnerDescription]
+
+UNIVERSAL_LABEL = "universal"
 
 
 def get_dead_runners_in_ec2(runners: RunnerDescriptions) -> RunnerDescriptions:
@@ -193,52 +185,6 @@ def get_access_token(jwt_token: str, installation_id: int) -> str:
     return data["token"]  # type: ignore
 
 
-def list_runners(access_token: str) -> RunnerDescriptions:
-    headers = {
-        "Authorization": f"token {access_token}",
-        "Accept": "application/vnd.github.v3+json",
-    }
-    per_page = 100
-    response = requests.get(
-        f"https://api.github.com/orgs/ClickHouse/actions/runners?per_page={per_page}",
-        headers=headers,
-    )
-    response.raise_for_status()
-    data = response.json()
-    total_runners = data["total_count"]
-    print("Expected total runners", total_runners)
-    runners = data["runners"]
-
-    # round to 0 for 0, 1 for 1..100, but to 2 for 101..200
-    total_pages = (total_runners - 1) // per_page + 1
-
-    print("Total pages", total_pages)
-    for i in range(2, total_pages + 1):
-        response = requests.get(
-            "https://api.github.com/orgs/ClickHouse/actions/runners"
-            f"?page={i}&per_page={per_page}",
-            headers=headers,
-        )
-        response.raise_for_status()
-        data = response.json()
-        runners += data["runners"]
-
-    print("Total runners", len(runners))
-    result = []
-    for runner in runners:
-        tags = [tag["name"] for tag in runner["labels"]]
-        desc = RunnerDescription(
-            id=runner["id"],
-            name=runner["name"],
-            tags=tags,
-            offline=runner["status"] == "offline",
-            busy=runner["busy"],
-        )
-        result.append(desc)
-
-    return result
-
-
 def group_runners_by_tag(
     listed_runners: RunnerDescriptions,
 ) -> Dict[str, RunnerDescriptions]:
diff --git a/tests/ci/ci_runners_metrics_lambda/lambda_shared b/tests/ci/ci_runners_metrics_lambda/lambda_shared
new file mode 120000
index 00000000000..ba86e090f6c
--- /dev/null
+++ b/tests/ci/ci_runners_metrics_lambda/lambda_shared
@@ -0,0 +1 @@
+../lambda_shared_package/lambda_shared
\ No newline at end of file
diff --git a/tests/ci/ci_runners_metrics_lambda/requirements.txt b/tests/ci/ci_runners_metrics_lambda/requirements.txt
index 98be09ab232..e99dee1743c 100644
--- a/tests/ci/ci_runners_metrics_lambda/requirements.txt
+++ b/tests/ci/ci_runners_metrics_lambda/requirements.txt
@@ -1,3 +1,3 @@
-requests<2.30
+../lambda_shared_package
 PyJWT
 cryptography<38
diff --git a/tests/ci/lambda_shared_package/lambda_shared/__init__.py b/tests/ci/lambda_shared_package/lambda_shared/__init__.py
index c5ae4df9e17..fe52f98d5f6 100644
--- a/tests/ci/lambda_shared_package/lambda_shared/__init__.py
+++ b/tests/ci/lambda_shared_package/lambda_shared/__init__.py
@@ -4,7 +4,8 @@ It exists as __init__.py and lambda_shared/__init__.py to work both in local and
 import json
 import logging
 import time
-from typing import List, Optional
+from collections import namedtuple
+from typing import Any, List, Optional
 
 import boto3  # type: ignore
 import requests  # type: ignore
@@ -21,10 +22,14 @@ RUNNER_TYPE_LABELS = [
 
 
 ### VENDORING
-def get_parameter_from_ssm(name, decrypt=True, client=None):
+def get_parameter_from_ssm(
+    name: str, decrypt: bool = True, client: Optional[Any] = None
+) -> str:
     if not client:
         client = boto3.client("ssm", region_name="us-east-1")
-    return client.get_parameter(Name=name, WithDecryption=decrypt)["Parameter"]["Value"]
+    return client.get_parameter(Name=name, WithDecryption=decrypt)[  # type: ignore
+        "Parameter"
+    ]["Value"]
 
 
 class CHException(Exception):
@@ -65,10 +70,64 @@ class ClickHouseHelper:
 
         raise CHException("Cannot fetch data from clickhouse")
 
-    def select_json_each_row(self, db: str, query: str) -> List[dict]:  # type: ignore
+    def select_json_each_row(self, db: str, query: str) -> List[dict]:
         text = self._select_and_get_json_each_row(db, query)
         result = []
         for line in text.split("\n"):
             if line:
                 result.append(json.loads(line))
         return result
+
+
+### Runners
+
+RunnerDescription = namedtuple(
+    "RunnerDescription", ["id", "name", "tags", "offline", "busy"]
+)
+RunnerDescriptions = List[RunnerDescription]
+
+
+def list_runners(access_token: str) -> RunnerDescriptions:
+    headers = {
+        "Authorization": f"token {access_token}",
+        "Accept": "application/vnd.github.v3+json",
+    }
+    per_page = 100
+    response = requests.get(
+        f"https://api.github.com/orgs/ClickHouse/actions/runners?per_page={per_page}",
+        headers=headers,
+    )
+    response.raise_for_status()
+    data = response.json()
+    total_runners = data["total_count"]
+    print("Expected total runners", total_runners)
+    runners = data["runners"]
+
+    # round to 0 for 0, 1 for 1..100, but to 2 for 101..200
+    total_pages = (total_runners - 1) // per_page + 1
+
+    print("Total pages", total_pages)
+    for i in range(2, total_pages + 1):
+        response = requests.get(
+            "https://api.github.com/orgs/ClickHouse/actions/runners"
+            f"?page={i}&per_page={per_page}",
+            headers=headers,
+        )
+        response.raise_for_status()
+        data = response.json()
+        runners += data["runners"]
+
+    print("Total runners", len(runners))
+    result = []
+    for runner in runners:
+        tags = [tag["name"] for tag in runner["labels"]]
+        desc = RunnerDescription(
+            id=runner["id"],
+            name=runner["name"],
+            tags=tags,
+            offline=runner["status"] == "offline",
+            busy=runner["busy"],
+        )
+        result.append(desc)
+
+    return result
diff --git a/tests/ci/terminate_runner_lambda/app.py b/tests/ci/terminate_runner_lambda/app.py
index e0164bc58c0..5799a498d5a 100644
--- a/tests/ci/terminate_runner_lambda/app.py
+++ b/tests/ci/terminate_runner_lambda/app.py
@@ -4,7 +4,6 @@ import argparse
 import json
 import sys
 import time
-from collections import namedtuple
 from dataclasses import dataclass
 from typing import Any, Dict, List, Tuple
 
@@ -12,6 +11,8 @@ import boto3  # type: ignore
 import requests  # type: ignore
 import jwt
 
+from lambda_shared import RunnerDescriptions, list_runners
+
 
 def get_key_and_app_from_aws() -> Tuple[str, int]:
     secret_name = "clickhouse_github_secret_key"
@@ -118,58 +119,6 @@ def get_cached_instances() -> dict:
     return cached_instances.value
 
 
-RunnerDescription = namedtuple(
-    "RunnerDescription", ["id", "name", "tags", "offline", "busy"]
-)
-RunnerDescriptions = List[RunnerDescription]
-
-
-def list_runners(access_token: str) -> RunnerDescriptions:
-    headers = {
-        "Authorization": f"token {access_token}",
-        "Accept": "application/vnd.github.v3+json",
-    }
-    per_page = 100
-    response = requests.get(
-        f"https://api.github.com/orgs/ClickHouse/actions/runners?per_page={per_page}",
-        headers=headers,
-    )
-    response.raise_for_status()
-    data = response.json()
-    total_runners = data["total_count"]
-    print("Expected total runners", total_runners)
-    runners = data["runners"]
-
-    # round to 0 for 0, 1 for 1..100, but to 2 for 101..200
-    total_pages = (total_runners - 1) // per_page + 1
-
-    print("Total pages", total_pages)
-    for i in range(2, total_pages + 1):
-        response = requests.get(
-            "https://api.github.com/orgs/ClickHouse/actions/runners"
-            f"?page={i}&per_page={per_page}",
-            headers=headers,
-        )
-        response.raise_for_status()
-        data = response.json()
-        runners += data["runners"]
-
-    print("Total runners", len(runners))
-    result = []
-    for runner in runners:
-        tags = [tag["name"] for tag in runner["labels"]]
-        desc = RunnerDescription(
-            id=runner["id"],
-            name=runner["name"],
-            tags=tags,
-            offline=runner["status"] == "offline",
-            busy=runner["busy"],
-        )
-        result.append(desc)
-
-    return result
-
-
 def how_many_instances_to_kill(event_data: dict) -> Dict[str, int]:
     data_array = event_data["CapacityToTerminate"]
     to_kill_by_zone = {}  # type: Dict[str, int]
diff --git a/tests/ci/terminate_runner_lambda/lambda_shared b/tests/ci/terminate_runner_lambda/lambda_shared
new file mode 120000
index 00000000000..ba86e090f6c
--- /dev/null
+++ b/tests/ci/terminate_runner_lambda/lambda_shared
@@ -0,0 +1 @@
+../lambda_shared_package/lambda_shared
\ No newline at end of file
diff --git a/tests/ci/terminate_runner_lambda/requirements.txt b/tests/ci/terminate_runner_lambda/requirements.txt
index 98be09ab232..e99dee1743c 100644
--- a/tests/ci/terminate_runner_lambda/requirements.txt
+++ b/tests/ci/terminate_runner_lambda/requirements.txt
@@ -1,3 +1,3 @@
-requests<2.30
+../lambda_shared_package
 PyJWT
 cryptography<38

From acb9531ebf829a5c11bbeff6661e8d2122334ee6 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Tue, 23 May 2023 18:47:19 +0200
Subject: [PATCH 11/64] Unify and put GH access token to the lambda_shared

---
 .../cancel_and_rerun_workflow_lambda/app.py   | 176 +----------------
 .../lambda_shared                             |   1 +
 .../requirements.txt                          |   4 +-
 tests/ci/ci_runners_metrics_lambda/app.py     |  70 ++-----
 .../requirements.txt                          |   3 +-
 tests/ci/lambda_shared_package/__init__.py    |   0
 .../lambda_shared_package/lambda_shared/pr.py | 184 ++++++++++++++++++
 .../lambda_shared/token.py                    |  90 +++++++++
 tests/ci/lambda_shared_package/pyproject.toml |  10 +
 tests/ci/run_check.py                         |   8 +-
 tests/ci/runner_token_rotation_lambda/app.py  |  61 +-----
 .../lambda_shared                             |   1 +
 .../requirements.txt                          |   4 +-
 tests/ci/team_keys_lambda/app.py              |   4 +-
 tests/ci/team_keys_lambda/lambda_shared       |   1 +
 tests/ci/team_keys_lambda/requirements.txt    |   2 +-
 tests/ci/terminate_runner_lambda/app.py       |  92 +--------
 .../terminate_runner_lambda/requirements.txt  |   4 +-
 tests/ci/workflow_approve_rerun_lambda/app.py | 122 +-----------
 .../lambda_shared                             |   1 +
 .../requirements.txt                          |   4 +-
 tests/ci/workflow_jobs_lambda/lambda_shared   |   1 +
 22 files changed, 332 insertions(+), 511 deletions(-)
 create mode 120000 tests/ci/cancel_and_rerun_workflow_lambda/lambda_shared
 create mode 100644 tests/ci/lambda_shared_package/__init__.py
 create mode 100644 tests/ci/lambda_shared_package/lambda_shared/pr.py
 create mode 100644 tests/ci/lambda_shared_package/lambda_shared/token.py
 create mode 120000 tests/ci/runner_token_rotation_lambda/lambda_shared
 create mode 120000 tests/ci/team_keys_lambda/lambda_shared
 create mode 120000 tests/ci/workflow_approve_rerun_lambda/lambda_shared
 create mode 120000 tests/ci/workflow_jobs_lambda/lambda_shared

diff --git a/tests/ci/cancel_and_rerun_workflow_lambda/app.py b/tests/ci/cancel_and_rerun_workflow_lambda/app.py
index 54c87fbcfa5..250655ddeb2 100644
--- a/tests/ci/cancel_and_rerun_workflow_lambda/app.py
+++ b/tests/ci/cancel_and_rerun_workflow_lambda/app.py
@@ -9,9 +9,10 @@ import json
 import re
 import time
 
-import jwt
 import requests  # type: ignore
-import boto3  # type: ignore
+
+from lambda_shared.pr import CATEGORY_TO_LABEL, check_pr_description
+from lambda_shared.token import get_cached_access_token
 
 
 NEED_RERUN_ON_EDITED = {
@@ -27,123 +28,6 @@ MAX_RETRY = 5
 
 DEBUG_INFO = {}  # type: Dict[str, Any]
 
-# Descriptions are used in .github/PULL_REQUEST_TEMPLATE.md, keep comments there
-# updated accordingly
-# The following lists are append only, try to avoid editing them
-# They still could be cleaned out after the decent time though.
-LABELS = {
-    "pr-backward-incompatible": ["Backward Incompatible Change"],
-    "pr-bugfix": [
-        "Bug Fix",
-        "Bug Fix (user-visible misbehavior in an official stable release)",
-        "Bug Fix (user-visible misbehaviour in official stable or prestable release)",
-        "Bug Fix (user-visible misbehavior in official stable or prestable release)",
-    ],
-    "pr-build": [
-        "Build/Testing/Packaging Improvement",
-        "Build Improvement",
-        "Build/Testing Improvement",
-        "Build",
-        "Packaging Improvement",
-    ],
-    "pr-documentation": [
-        "Documentation (changelog entry is not required)",
-        "Documentation",
-    ],
-    "pr-feature": ["New Feature"],
-    "pr-improvement": ["Improvement"],
-    "pr-not-for-changelog": [
-        "Not for changelog (changelog entry is not required)",
-        "Not for changelog",
-    ],
-    "pr-performance": ["Performance Improvement"],
-}
-
-CATEGORY_TO_LABEL = {c: lb for lb, categories in LABELS.items() for c in categories}
-
-
-def check_pr_description(pr_body: str) -> Tuple[str, str]:
-    """The function checks the body to being properly formatted according to
-    .github/PULL_REQUEST_TEMPLATE.md, if the first returned string is not empty,
-    then there is an error."""
-    lines = list(map(lambda x: x.strip(), pr_body.split("\n") if pr_body else []))
-    lines = [re.sub(r"\s+", " ", line) for line in lines]
-
-    # Check if body contains "Reverts ClickHouse/ClickHouse#36337"
-    if [
-        True
-        for line in lines
-        if re.match(r"\AReverts {GITHUB_REPOSITORY}#[\d]+\Z", line)
-    ]:
-        return "", LABELS["pr-not-for-changelog"][0]
-
-    category = ""
-    entry = ""
-    description_error = ""
-
-    i = 0
-    while i < len(lines):
-        if re.match(r"(?i)^[#>*_ ]*change\s*log\s*category", lines[i]):
-            i += 1
-            if i >= len(lines):
-                break
-            # Can have one empty line between header and the category
-            # itself. Filter it out.
-            if not lines[i]:
-                i += 1
-                if i >= len(lines):
-                    break
-            category = re.sub(r"^[-*\s]*", "", lines[i])
-            i += 1
-
-            # Should not have more than one category. Require empty line
-            # after the first found category.
-            if i >= len(lines):
-                break
-            if lines[i]:
-                second_category = re.sub(r"^[-*\s]*", "", lines[i])
-                description_error = (
-                    "More than one changelog category specified: "
-                    f"'{category}', '{second_category}'"
-                )
-                return description_error, category
-
-        elif re.match(
-            r"(?i)^[#>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i]
-        ):
-            i += 1
-            # Can have one empty line between header and the entry itself.
-            # Filter it out.
-            if i < len(lines) and not lines[i]:
-                i += 1
-            # All following lines until empty one are the changelog entry.
-            entry_lines = []
-            while i < len(lines) and lines[i]:
-                entry_lines.append(lines[i])
-                i += 1
-            entry = " ".join(entry_lines)
-            # Don't accept changelog entries like '...'.
-            entry = re.sub(r"[#>*_.\- ]", "", entry)
-            # Don't accept changelog entries like 'Close #12345'.
-            entry = re.sub(r"^[\w\-\s]{0,10}#?\d{5,6}\.?$", "", entry)
-        else:
-            i += 1
-
-    if not category:
-        description_error = "Changelog category is empty"
-    # Filter out the PR categories that are not for changelog.
-    elif re.match(
-        r"(?i)doc|((non|in|not|un)[-\s]*significant)|(not[ ]*for[ ]*changelog)",
-        category,
-    ):
-        pass  # to not check the rest of the conditions
-    elif category not in CATEGORY_TO_LABEL:
-        description_error, category = f"Category '{category}' is not valid", ""
-    elif not entry:
-        description_error = f"Changelog entry required for category '{category}'"
-
-    return description_error, category
-
 
 class Worker(Thread):
     def __init__(
@@ -166,58 +50,6 @@ class Worker(Thread):
         self.queue.task_done()
 
 
-def get_installation_id(jwt_token):
-    headers = {
-        "Authorization": f"Bearer {jwt_token}",
-        "Accept": "application/vnd.github.v3+json",
-    }
-    response = requests.get("https://api.github.com/app/installations", headers=headers)
-    response.raise_for_status()
-    data = response.json()
-    for installation in data:
-        if installation["account"]["login"] == "ClickHouse":
-            installation_id = installation["id"]
-    return installation_id
-
-
-def get_access_token(jwt_token, installation_id):
-    headers = {
-        "Authorization": f"Bearer {jwt_token}",
-        "Accept": "application/vnd.github.v3+json",
-    }
-    response = requests.post(
-        f"https://api.github.com/app/installations/{installation_id}/access_tokens",
-        headers=headers,
-    )
-    response.raise_for_status()
-    data = response.json()
-    return data["token"]
-
-
-def get_key_and_app_from_aws():
-    secret_name = "clickhouse_github_secret_key"
-    session = boto3.session.Session()
-    client = session.client(
-        service_name="secretsmanager",
-    )
-    get_secret_value_response = client.get_secret_value(SecretId=secret_name)
-    data = json.loads(get_secret_value_response["SecretString"])
-    return data["clickhouse-app-key"], int(data["clickhouse-app-id"])
-
-
-def get_token_from_aws():
-    private_key, app_id = get_key_and_app_from_aws()
-    payload = {
-        "iat": int(time.time()) - 60,
-        "exp": int(time.time()) + (10 * 60),
-        "iss": app_id,
-    }
-
-    encoded_jwt = jwt.encode(payload, private_key, algorithm="RS256")
-    installation_id = get_installation_id(encoded_jwt)
-    return get_access_token(encoded_jwt, installation_id)
-
-
 def _exec_get_with_retry(url: str, token: str) -> dict:
     headers = {"Authorization": f"token {token}"}
     for i in range(MAX_RETRY):
@@ -407,7 +239,7 @@ def exec_workflow_url(urls_to_post, token):
 
 
 def main(event):
-    token = get_token_from_aws()
+    token = get_cached_access_token()
     DEBUG_INFO["event"] = event
     if event["isBase64Encoded"]:
         event_data = json.loads(b64decode(event["body"]))
diff --git a/tests/ci/cancel_and_rerun_workflow_lambda/lambda_shared b/tests/ci/cancel_and_rerun_workflow_lambda/lambda_shared
new file mode 120000
index 00000000000..ba86e090f6c
--- /dev/null
+++ b/tests/ci/cancel_and_rerun_workflow_lambda/lambda_shared
@@ -0,0 +1 @@
+../lambda_shared_package/lambda_shared
\ No newline at end of file
diff --git a/tests/ci/cancel_and_rerun_workflow_lambda/requirements.txt b/tests/ci/cancel_and_rerun_workflow_lambda/requirements.txt
index 98be09ab232..4cb3fba0f7b 100644
--- a/tests/ci/cancel_and_rerun_workflow_lambda/requirements.txt
+++ b/tests/ci/cancel_and_rerun_workflow_lambda/requirements.txt
@@ -1,3 +1 @@
-requests<2.30
-PyJWT
-cryptography<38
+../lambda_shared_package[token]
diff --git a/tests/ci/ci_runners_metrics_lambda/app.py b/tests/ci/ci_runners_metrics_lambda/app.py
index a12143752a1..dc128dea739 100644
--- a/tests/ci/ci_runners_metrics_lambda/app.py
+++ b/tests/ci/ci_runners_metrics_lambda/app.py
@@ -8,12 +8,9 @@ Lambda function to:
 
 import argparse
 import sys
-import json
-import time
 from datetime import datetime
-from typing import Dict, List, Tuple
+from typing import Dict, List
 
-import jwt
 import requests  # type: ignore
 import boto3  # type: ignore
 from botocore.exceptions import ClientError  # type: ignore
@@ -24,6 +21,11 @@ from lambda_shared import (
     RunnerDescriptions,
     list_runners,
 )
+from lambda_shared.token import (
+    get_cached_access_token,
+    get_key_and_app_from_aws,
+    get_access_token_by_key_app,
+)
 
 UNIVERSAL_LABEL = "universal"
 
@@ -139,50 +141,8 @@ def get_lost_ec2_instances(runners: RunnerDescriptions) -> List[dict]:
     return lost_instances
 
 
-def get_key_and_app_from_aws() -> Tuple[str, int]:
-    secret_name = "clickhouse_github_secret_key"
-    session = boto3.session.Session()
-    client = session.client(
-        service_name="secretsmanager",
-    )
-    get_secret_value_response = client.get_secret_value(SecretId=secret_name)
-    data = json.loads(get_secret_value_response["SecretString"])
-    return data["clickhouse-app-key"], int(data["clickhouse-app-id"])
-
-
 def handler(event, context):
-    private_key, app_id = get_key_and_app_from_aws()
-    main(private_key, app_id, True, True)
-
-
-def get_installation_id(jwt_token: str) -> int:
-    headers = {
-        "Authorization": f"Bearer {jwt_token}",
-        "Accept": "application/vnd.github.v3+json",
-    }
-    response = requests.get("https://api.github.com/app/installations", headers=headers)
-    response.raise_for_status()
-    data = response.json()
-    for installation in data:
-        if installation["account"]["login"] == "ClickHouse":
-            installation_id = installation["id"]
-            break
-
-    return installation_id  # type: ignore
-
-
-def get_access_token(jwt_token: str, installation_id: int) -> str:
-    headers = {
-        "Authorization": f"Bearer {jwt_token}",
-        "Accept": "application/vnd.github.v3+json",
-    }
-    response = requests.post(
-        f"https://api.github.com/app/installations/{installation_id}/access_tokens",
-        headers=headers,
-    )
-    response.raise_for_status()
-    data = response.json()
-    return data["token"]  # type: ignore
+    main(get_cached_access_token(), True, True)
 
 
 def group_runners_by_tag(
@@ -273,20 +233,10 @@ def delete_runner(access_token: str, runner: RunnerDescription) -> bool:
 
 
 def main(
-    github_secret_key: str,
-    github_app_id: int,
+    access_token: str,
     push_to_cloudwatch: bool,
     delete_offline_runners: bool,
 ) -> None:
-    payload = {
-        "iat": int(time.time()) - 60,
-        "exp": int(time.time()) + (10 * 60),
-        "iss": github_app_id,
-    }
-
-    encoded_jwt = jwt.encode(payload, github_secret_key, algorithm="RS256")
-    installation_id = get_installation_id(encoded_jwt)
-    access_token = get_access_token(encoded_jwt, installation_id)
     gh_runners = list_runners(access_token)
     grouped_runners = group_runners_by_tag(gh_runners)
     for group, group_runners in grouped_runners.items():
@@ -354,4 +304,6 @@ if __name__ == "__main__":
         print("Attempt to get key and id from AWS secret manager")
         private_key, args.app_id = get_key_and_app_from_aws()
 
-    main(private_key, args.app_id, args.push_to_cloudwatch, args.delete_offline)
+    token = get_access_token_by_key_app(private_key, args.app_id)
+
+    main(token, args.push_to_cloudwatch, args.delete_offline)
diff --git a/tests/ci/ci_runners_metrics_lambda/requirements.txt b/tests/ci/ci_runners_metrics_lambda/requirements.txt
index e99dee1743c..e2b16067a93 100644
--- a/tests/ci/ci_runners_metrics_lambda/requirements.txt
+++ b/tests/ci/ci_runners_metrics_lambda/requirements.txt
@@ -1,3 +1,2 @@
 ../lambda_shared_package
-PyJWT
-cryptography<38
+../lambda_shared_package[token]
diff --git a/tests/ci/lambda_shared_package/__init__.py b/tests/ci/lambda_shared_package/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/ci/lambda_shared_package/lambda_shared/pr.py b/tests/ci/lambda_shared_package/lambda_shared/pr.py
new file mode 100644
index 00000000000..ef47eacc082
--- /dev/null
+++ b/tests/ci/lambda_shared_package/lambda_shared/pr.py
@@ -0,0 +1,184 @@
+#!/usr/bin/env python
+
+import re
+from typing import Tuple
+
+# Individual trusted contirbutors who are not in any trusted organization.
+# Can be changed in runtime: we will append users that we learned to be in
+# a trusted org, to save GitHub API calls.
+TRUSTED_CONTRIBUTORS = {
+    e.lower()
+    for e in [
+        "achimbab",
+        "adevyatova ",  # DOCSUP
+        "Algunenano",  # Raúl Marín, Tinybird
+        "amosbird",
+        "AnaUvarova",  # DOCSUP
+        "anauvarova",  # technical writer, Yandex
+        "annvsh",  # technical writer, Yandex
+        "atereh",  # DOCSUP
+        "azat",
+        "bharatnc",  # Newbie, but already with many contributions.
+        "bobrik",  # Seasoned contributor, CloudFlare
+        "BohuTANG",
+        "codyrobert",  # Flickerbox engineer
+        "cwurm",  # Employee
+        "damozhaeva",  # DOCSUP
+        "den-crane",
+        "flickerbox-tom",  # Flickerbox
+        "gyuton",  # DOCSUP
+        "hagen1778",  # Roman Khavronenko, seasoned contributor
+        "hczhcz",
+        "hexiaoting",  # Seasoned contributor
+        "ildus",  # adjust, ex-pgpro
+        "javisantana",  # a Spanish ClickHouse enthusiast, ex-Carto
+        "ka1bi4",  # DOCSUP
+        "kirillikoff",  # DOCSUP
+        "kreuzerkrieg",
+        "lehasm",  # DOCSUP
+        "michon470",  # DOCSUP
+        "nikvas0",
+        "nvartolomei",
+        "olgarev",  # DOCSUP
+        "otrazhenia",  # Yandex docs contractor
+        "pdv-ru",  # DOCSUP
+        "podshumok",  # cmake expert from QRator Labs
+        "s-mx",  # Maxim Sabyanin, former employee, present contributor
+        "sevirov",  # technical writer, Yandex
+        "spongedu",  # Seasoned contributor
+        "taiyang-li",
+        "ucasFL",  # Amos Bird's friend
+        "vdimir",  # Employee
+        "vzakaznikov",
+        "YiuRULE",
+        "zlobober",  # Developer of YT
+        "ilejn",  # Arenadata, responsible for Kerberized Kafka
+        "thomoco",  # ClickHouse
+        "BoloniniD",  # Seasoned contributor, HSE
+        "tonickkozlov",  # Cloudflare
+        "tylerhannan",  # ClickHouse Employee
+        "myrrc",  # Mike Kot, DoubleCloud
+        "thevar1able",  # ClickHouse Employee
+        "aalexfvk",
+        "MikhailBurdukov",
+        "tsolodov",  # ClickHouse Employee
+        "kitaisreal",
+    ]
+}
+
+# Descriptions are used in .github/PULL_REQUEST_TEMPLATE.md, keep comments there
+# updated accordingly
+# The following lists are append only, try to avoid editing them
+# They still could be cleaned out after the decent time though.
+LABELS = {
+    "pr-backward-incompatible": ["Backward Incompatible Change"],
+    "pr-bugfix": [
+        "Bug Fix",
+        "Bug Fix (user-visible misbehavior in an official stable release)",
+        "Bug Fix (user-visible misbehaviour in official stable or prestable release)",
+        "Bug Fix (user-visible misbehavior in official stable or prestable release)",
+    ],
+    "pr-build": [
+        "Build/Testing/Packaging Improvement",
+        "Build Improvement",
+        "Build/Testing Improvement",
+        "Build",
+        "Packaging Improvement",
+    ],
+    "pr-documentation": [
+        "Documentation (changelog entry is not required)",
+        "Documentation",
+    ],
+    "pr-feature": ["New Feature"],
+    "pr-improvement": ["Improvement"],
+    "pr-not-for-changelog": [
+        "Not for changelog (changelog entry is not required)",
+        "Not for changelog",
+    ],
+    "pr-performance": ["Performance Improvement"],
+}
+
+CATEGORY_TO_LABEL = {c: lb for lb, categories in LABELS.items() for c in categories}
+
+
+def check_pr_description(pr_body: str) -> Tuple[str, str]:
+    """The function checks the body to being properly formatted according to
+    .github/PULL_REQUEST_TEMPLATE.md, if the first returned string is not empty,
+    then there is an error."""
+    lines = list(map(lambda x: x.strip(), pr_body.split("\n") if pr_body else []))
+    lines = [re.sub(r"\s+", " ", line) for line in lines]
+
+    # Check if body contains "Reverts ClickHouse/ClickHouse#36337"
+    if [
+        True
+        for line in lines
+        if re.match(r"\AReverts {GITHUB_REPOSITORY}#[\d]+\Z", line)
+    ]:
+        return "", LABELS["pr-not-for-changelog"][0]
+
+    category = ""
+    entry = ""
+    description_error = ""
+
+    i = 0
+    while i < len(lines):
+        if re.match(r"(?i)^[#>*_ ]*change\s*log\s*category", lines[i]):
+            i += 1
+            if i >= len(lines):
+                break
+            # Can have one empty line between header and the category
+            # itself. Filter it out.
+            if not lines[i]:
+                i += 1
+                if i >= len(lines):
+                    break
+            category = re.sub(r"^[-*\s]*", "", lines[i])
+            i += 1
+
+            # Should not have more than one category. Require empty line
+            # after the first found category.
+            if i >= len(lines):
+                break
+            if lines[i]:
+                second_category = re.sub(r"^[-*\s]*", "", lines[i])
+                description_error = (
+                    "More than one changelog category specified: "
+                    f"'{category}', '{second_category}'"
+                )
+                return description_error, category
+
+        elif re.match(
+            r"(?i)^[#>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i]
+        ):
+            i += 1
+            # Can have one empty line between header and the entry itself.
+            # Filter it out.
+            if i < len(lines) and not lines[i]:
+                i += 1
+            # All following lines until empty one are the changelog entry.
+            entry_lines = []
+            while i < len(lines) and lines[i]:
+                entry_lines.append(lines[i])
+                i += 1
+            entry = " ".join(entry_lines)
+            # Don't accept changelog entries like '...'.
+            entry = re.sub(r"[#>*_.\- ]", "", entry)
+            # Don't accept changelog entries like 'Close #12345'.
+            entry = re.sub(r"^[\w\-\s]{0,10}#?\d{5,6}\.?$", "", entry)
+        else:
+            i += 1
+
+    if not category:
+        description_error = "Changelog category is empty"
+    # Filter out the PR categories that are not for changelog.
+    elif re.match(
+        r"(?i)doc|((non|in|not|un)[-\s]*significant)|(not[ ]*for[ ]*changelog)",
+        category,
+    ):
+        pass  # to not check the rest of the conditions
+    elif category not in CATEGORY_TO_LABEL:
+        description_error, category = f"Category '{category}' is not valid", ""
+    elif not entry:
+        description_error = f"Changelog entry required for category '{category}'"
+
+    return description_error, category
diff --git a/tests/ci/lambda_shared_package/lambda_shared/token.py b/tests/ci/lambda_shared_package/lambda_shared/token.py
new file mode 100644
index 00000000000..174ea4625a3
--- /dev/null
+++ b/tests/ci/lambda_shared_package/lambda_shared/token.py
@@ -0,0 +1,90 @@
+"""Module to get the token for GitHub"""
+from dataclasses import dataclass
+import json
+import time
+from typing import Tuple
+
+import boto3  # type: ignore
+import jwt
+import requests  # type: ignore
+
+
+def get_key_and_app_from_aws() -> Tuple[str, int]:
+    secret_name = "clickhouse_github_secret_key"
+    session = boto3.session.Session()
+    client = session.client(
+        service_name="secretsmanager",
+    )
+    get_secret_value_response = client.get_secret_value(SecretId=secret_name)
+    data = json.loads(get_secret_value_response["SecretString"])
+    return data["clickhouse-app-key"], int(data["clickhouse-app-id"])
+
+
+def get_installation_id(jwt_token: str) -> int:
+    headers = {
+        "Authorization": f"Bearer {jwt_token}",
+        "Accept": "application/vnd.github.v3+json",
+    }
+    response = requests.get("https://api.github.com/app/installations", headers=headers)
+    response.raise_for_status()
+    data = response.json()
+    for installation in data:
+        if installation["account"]["login"] == "ClickHouse":
+            installation_id = installation["id"]
+
+    return installation_id  # type: ignore
+
+
+def get_access_token_by_jwt(jwt_token: str, installation_id: int) -> str:
+    headers = {
+        "Authorization": f"Bearer {jwt_token}",
+        "Accept": "application/vnd.github.v3+json",
+    }
+    response = requests.post(
+        f"https://api.github.com/app/installations/{installation_id}/access_tokens",
+        headers=headers,
+    )
+    response.raise_for_status()
+    data = response.json()
+    return data["token"]  # type: ignore
+
+
+def get_token_from_aws() -> str:
+    private_key, app_id = get_key_and_app_from_aws()
+    return get_access_token_by_key_app(private_key, app_id)
+
+
+def get_access_token_by_key_app(private_key: str, app_id: int) -> str:
+    payload = {
+        "iat": int(time.time()) - 60,
+        "exp": int(time.time()) + (10 * 60),
+        "iss": app_id,
+    }
+
+    encoded_jwt = jwt.encode(payload, private_key, algorithm="RS256")
+    installation_id = get_installation_id(encoded_jwt)
+    return get_access_token_by_jwt(encoded_jwt, installation_id)
+
+
+@dataclass
+class CachedToken:
+    time: int
+    value: str
+    updating: bool = False
+
+
+_cached_token = CachedToken(0, "")
+
+
+def get_cached_access_token() -> str:
+    if time.time() - 550 < _cached_token.time or _cached_token.updating:
+        return _cached_token.value
+    # Indicate that the value is updating now, so the cached value can be
+    # used. The first setting and close-to-ttl are not counted as update
+    if _cached_token.time != 0 or time.time() - 590 < _cached_token.time:
+        _cached_token.updating = True
+    private_key, app_id = get_key_and_app_from_aws()
+    _cached_token.time = int(time.time())
+    _cached_token.value = get_access_token_by_key_app(private_key, app_id)
+    _cached_token.updating = False
+    return _cached_token.value
diff --git a/tests/ci/lambda_shared_package/pyproject.toml b/tests/ci/lambda_shared_package/pyproject.toml
index 8b4b0a80948..bbf74cc0649 100644
--- a/tests/ci/lambda_shared_package/pyproject.toml
+++ b/tests/ci/lambda_shared_package/pyproject.toml
@@ -9,5 +9,15 @@ dependencies = [
   "requests < 2.30",
 ]
 
+[project.optional-dependencies]
+token = [
+  "PyJWT",
+  "cryptography<38",
+]
+dev = [
+  "boto3",
+  "lambda_shared[token]",
+]
+
 [tool.distutils.bdist_wheel]
 universal = true
diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py
index 9849f19a1e4..330a1309016 100644
--- a/tests/ci/run_check.py
+++ b/tests/ci/run_check.py
@@ -20,9 +20,11 @@ from docs_check import NAME as DOCS_NAME
 from env_helper import GITHUB_REPOSITORY, GITHUB_SERVER_URL
 from get_robot_token import get_best_robot_token
 from pr_info import FORCE_TESTS_LABEL, PRInfo
-
-from cancel_and_rerun_workflow_lambda.app import CATEGORY_TO_LABEL, check_pr_description
-from workflow_approve_rerun_lambda.app import TRUSTED_CONTRIBUTORS
+from lambda_shared_package.lambda_shared.pr import (
+    CATEGORY_TO_LABEL,
+    TRUSTED_CONTRIBUTORS,
+    check_pr_description,
+)
 
 TRUSTED_ORG_IDS = {
     54801242,  # clickhouse
diff --git a/tests/ci/runner_token_rotation_lambda/app.py b/tests/ci/runner_token_rotation_lambda/app.py
index 70ee5da01f4..6544eee9581 100644
--- a/tests/ci/runner_token_rotation_lambda/app.py
+++ b/tests/ci/runner_token_rotation_lambda/app.py
@@ -2,40 +2,11 @@
 
 import argparse
 import sys
-import json
-import time
 
 import boto3  # type: ignore
-import jwt
 import requests  # type: ignore
 
-
-def get_installation_id(jwt_token):
-    headers = {
-        "Authorization": f"Bearer {jwt_token}",
-        "Accept": "application/vnd.github.v3+json",
-    }
-    response = requests.get("https://api.github.com/app/installations", headers=headers)
-    response.raise_for_status()
-    data = response.json()
-    for installation in data:
-        if installation["account"]["login"] == "ClickHouse":
-            installation_id = installation["id"]
-    return installation_id
-
-
-def get_access_token(jwt_token, installation_id):
-    headers = {
-        "Authorization": f"Bearer {jwt_token}",
-        "Accept": "application/vnd.github.v3+json",
-    }
-    response = requests.post(
-        f"https://api.github.com/app/installations/{installation_id}/access_tokens",
-        headers=headers,
-    )
-    response.raise_for_status()
-    data = response.json()
-    return data["token"]
+from lambda_shared.token import get_cached_access_token, get_access_token_by_key_app
 
 
 def get_runner_registration_token(access_token):
@@ -52,32 +23,10 @@ def get_runner_registration_token(access_token):
     return data["token"]
 
 
-def get_key_and_app_from_aws():
-    secret_name = "clickhouse_github_secret_key"
-    session = boto3.session.Session()
-    client = session.client(
-        service_name="secretsmanager",
-    )
-    get_secret_value_response = client.get_secret_value(SecretId=secret_name)
-    data = json.loads(get_secret_value_response["SecretString"])
-    return data["clickhouse-app-key"], int(data["clickhouse-app-id"])
-
-
-def main(github_secret_key, github_app_id, push_to_ssm, ssm_parameter_name):
-    payload = {
-        "iat": int(time.time()) - 60,
-        "exp": int(time.time()) + (10 * 60),
-        "iss": github_app_id,
-    }
-
-    encoded_jwt = jwt.encode(payload, github_secret_key, algorithm="RS256")
-    installation_id = get_installation_id(encoded_jwt)
-    access_token = get_access_token(encoded_jwt, installation_id)
+def main(access_token, push_to_ssm, ssm_parameter_name):
     runner_registration_token = get_runner_registration_token(access_token)
 
     if push_to_ssm:
-        import boto3
-
         print("Trying to put params into ssm manager")
         client = boto3.client("ssm")
         client.put_parameter(
@@ -94,8 +43,7 @@ def main(github_secret_key, github_app_id, push_to_ssm, ssm_parameter_name):
 
 
 def handler(event, context):
-    private_key, app_id = get_key_and_app_from_aws()
-    main(private_key, app_id, True, "github_runner_registration_token")
+    main(get_cached_access_token(), True, "github_runner_registration_token")
 
 
 if __name__ == "__main__":
@@ -140,4 +88,5 @@ if __name__ == "__main__":
         with open(args.private_key_path, "r") as key_file:
             private_key = key_file.read()
 
-    main(private_key, args.app_id, args.push_to_ssm, args.ssm_parameter_name)
+    token = get_access_token_by_key_app(private_key, args.app_id)
+    main(token, args.push_to_ssm, args.ssm_parameter_name)
diff --git a/tests/ci/runner_token_rotation_lambda/lambda_shared b/tests/ci/runner_token_rotation_lambda/lambda_shared
new file mode 120000
index 00000000000..ba86e090f6c
--- /dev/null
+++ b/tests/ci/runner_token_rotation_lambda/lambda_shared
@@ -0,0 +1 @@
+../lambda_shared_package/lambda_shared
\ No newline at end of file
diff --git a/tests/ci/runner_token_rotation_lambda/requirements.txt b/tests/ci/runner_token_rotation_lambda/requirements.txt
index 98be09ab232..4cb3fba0f7b 100644
--- a/tests/ci/runner_token_rotation_lambda/requirements.txt
+++ b/tests/ci/runner_token_rotation_lambda/requirements.txt
@@ -1,3 +1 @@
-requests<2.30
-PyJWT
-cryptography<38
+../lambda_shared_package[token]
diff --git a/tests/ci/team_keys_lambda/app.py b/tests/ci/team_keys_lambda/app.py
index 870d41c441e..f562fbe101d 100644
--- a/tests/ci/team_keys_lambda/app.py
+++ b/tests/ci/team_keys_lambda/app.py
@@ -81,6 +81,8 @@ def get_cached_members_keys(members: set) -> Keys:
 
 
 def get_token_from_aws() -> str:
+    # We need a separate token, since the clickhouse-ci app does not have
+    # access to the organization members' endpoint
     secret_name = "clickhouse_robot_token"
     session = boto3.session.Session()
     client = session.client(
@@ -130,4 +132,4 @@ if __name__ == "__main__":
     args = parser.parse_args()
     output = main(args.token, args.organization, args.team)
 
-    print(f"# Just shoing off the keys:\n{output}")
+    print(f"# Just showing off the keys:\n{output}")
diff --git a/tests/ci/team_keys_lambda/lambda_shared b/tests/ci/team_keys_lambda/lambda_shared
new file mode 120000
index 00000000000..ba86e090f6c
--- /dev/null
+++ b/tests/ci/team_keys_lambda/lambda_shared
@@ -0,0 +1 @@
+../lambda_shared_package/lambda_shared
\ No newline at end of file
diff --git a/tests/ci/team_keys_lambda/requirements.txt b/tests/ci/team_keys_lambda/requirements.txt
index 3bcbe2dfd07..098e04a9798 100644
--- a/tests/ci/team_keys_lambda/requirements.txt
+++ b/tests/ci/team_keys_lambda/requirements.txt
@@ -1 +1 @@
-requests<2.30
+../lambda_shared_package
diff --git a/tests/ci/terminate_runner_lambda/app.py b/tests/ci/terminate_runner_lambda/app.py
index 5799a498d5a..98b14508314 100644
--- a/tests/ci/terminate_runner_lambda/app.py
+++ b/tests/ci/terminate_runner_lambda/app.py
@@ -5,86 +5,12 @@ import json
 import sys
 import time
 from dataclasses import dataclass
-from typing import Any, Dict, List, Tuple
+from typing import Any, Dict, List
 
 import boto3  # type: ignore
-import requests  # type: ignore
-import jwt
 
 from lambda_shared import RunnerDescriptions, list_runners
-
-
-def get_key_and_app_from_aws() -> Tuple[str, int]:
-    secret_name = "clickhouse_github_secret_key"
-    session = boto3.session.Session()
-    client = session.client(
-        service_name="secretsmanager",
-    )
-    get_secret_value_response = client.get_secret_value(SecretId=secret_name)
-    data = json.loads(get_secret_value_response["SecretString"])
-    return data["clickhouse-app-key"], int(data["clickhouse-app-id"])
-
-
-def get_installation_id(jwt_token: str) -> int:
-    headers = {
-        "Authorization": f"Bearer {jwt_token}",
-        "Accept": "application/vnd.github.v3+json",
-    }
-    response = requests.get("https://api.github.com/app/installations", headers=headers)
-    response.raise_for_status()
-    data = response.json()
-    for installation in data:
-        if installation["account"]["login"] == "ClickHouse":
-            installation_id = installation["id"]
-            break
-
-    return installation_id  # type: ignore
-
-
-def get_access_token(jwt_token: str, installation_id: int) -> str:
-    headers = {
-        "Authorization": f"Bearer {jwt_token}",
-        "Accept": "application/vnd.github.v3+json",
-    }
-    response = requests.post(
-        f"https://api.github.com/app/installations/{installation_id}/access_tokens",
-        headers=headers,
-    )
-    response.raise_for_status()
-    data = response.json()
-    return data["token"]  # type: ignore
-
-
-@dataclass
-class CachedToken:
-    time: int
-    value: str
-    updating: bool = False
-
-
-cached_token = CachedToken(0, "")
-
-
-def get_cached_access_token() -> str:
-    if time.time() - 550 < cached_token.time or cached_token.updating:
-        return cached_token.value
-    # Indicate that the value is updating now, so the cached value can be
-    # used. The first setting and close-to-ttl are not counted as update
-    if cached_token.time != 0 or time.time() - 590 < cached_token.time:
-        cached_token.updating = True
-    private_key, app_id = get_key_and_app_from_aws()
-    payload = {
-        "iat": int(time.time()) - 60,
-        "exp": int(time.time()) + (10 * 60),
-        "iss": app_id,
-    }
-
-    encoded_jwt = jwt.encode(payload, private_key, algorithm="RS256")
-    installation_id = get_installation_id(encoded_jwt)
-    cached_token.time = int(time.time())
-    cached_token.value = get_access_token(encoded_jwt, installation_id)
-    cached_token.updating = False
-    return cached_token.value
+from lambda_shared.token import get_access_token_by_key_app, get_cached_access_token
 
 
 @dataclass
@@ -284,6 +210,8 @@ if __name__ == "__main__":
         with open(args.private_key_path, "r") as key_file:
             private_key = key_file.read()
 
+    token = get_access_token_by_key_app(private_key, args.app_id)
+
     sample_event = {
         "AutoScalingGroupARN": "arn:aws:autoscaling:us-east-1:<account-id>:autoScalingGroup:d4738357-2d40-4038-ae7e-b00ae0227003:autoScalingGroupName/my-asg",
         "AutoScalingGroupName": "my-asg",
@@ -328,14 +256,4 @@ if __name__ == "__main__":
         "Cause": "SCALE_IN",
     }
 
-    payload = {
-        "iat": int(time.time()) - 60,
-        "exp": int(time.time()) + (10 * 60),
-        "iss": args.app_id,
-    }
-
-    encoded_jwt = jwt.encode(payload, private_key, algorithm="RS256")
-    installation_id = get_installation_id(encoded_jwt)
-    access_token = get_access_token(encoded_jwt, args.app_id)
-
-    main(access_token, sample_event)
+    main(token, sample_event)
diff --git a/tests/ci/terminate_runner_lambda/requirements.txt b/tests/ci/terminate_runner_lambda/requirements.txt
index e99dee1743c..4cb3fba0f7b 100644
--- a/tests/ci/terminate_runner_lambda/requirements.txt
+++ b/tests/ci/terminate_runner_lambda/requirements.txt
@@ -1,3 +1 @@
-../lambda_shared_package
-PyJWT
-cryptography<38
+../lambda_shared_package[token]
diff --git a/tests/ci/workflow_approve_rerun_lambda/app.py b/tests/ci/workflow_approve_rerun_lambda/app.py
index 32cba5d466b..3db62430d85 100644
--- a/tests/ci/workflow_approve_rerun_lambda/app.py
+++ b/tests/ci/workflow_approve_rerun_lambda/app.py
@@ -5,9 +5,10 @@ import fnmatch
 import json
 import time
 
-import jwt
 import requests  # type: ignore
-import boto3  # type: ignore
+
+from lambda_shared.pr import TRUSTED_CONTRIBUTORS
+from lambda_shared.token import get_cached_access_token
 
 SUSPICIOUS_CHANGED_FILES_NUMBER = 200
 
@@ -67,108 +68,6 @@ NEED_RERUN_WORKFLOWS = {
     "ReleaseBranchCI",
 }
 
-# Individual trusted contirbutors who are not in any trusted organization.
-# Can be changed in runtime: we will append users that we learned to be in
-# a trusted org, to save GitHub API calls.
-TRUSTED_CONTRIBUTORS = {
-    e.lower()
-    for e in [
-        "achimbab",
-        "adevyatova ",  # DOCSUP
-        "Algunenano",  # Raúl Marín, Tinybird
-        "amosbird",
-        "AnaUvarova",  # DOCSUP
-        "anauvarova",  # technical writer, Yandex
-        "annvsh",  # technical writer, Yandex
-        "atereh",  # DOCSUP
-        "azat",
-        "bharatnc",  # Newbie, but already with many contributions.
-        "bobrik",  # Seasoned contributor, CloudFlare
-        "BohuTANG",
-        "codyrobert",  # Flickerbox engineer
-        "cwurm",  # Employee
-        "damozhaeva",  # DOCSUP
-        "den-crane",
-        "flickerbox-tom",  # Flickerbox
-        "gyuton",  # DOCSUP
-        "hagen1778",  # Roman Khavronenko, seasoned contributor
-        "hczhcz",
-        "hexiaoting",  # Seasoned contributor
-        "ildus",  # adjust, ex-pgpro
-        "javisantana",  # a Spanish ClickHouse enthusiast, ex-Carto
-        "ka1bi4",  # DOCSUP
-        "kirillikoff",  # DOCSUP
-        "kreuzerkrieg",
-        "lehasm",  # DOCSUP
-        "michon470",  # DOCSUP
-        "nikvas0",
-        "nvartolomei",
-        "olgarev",  # DOCSUP
-        "otrazhenia",  # Yandex docs contractor
-        "pdv-ru",  # DOCSUP
-        "podshumok",  # cmake expert from QRator Labs
-        "s-mx",  # Maxim Sabyanin, former employee, present contributor
-        "sevirov",  # technical writer, Yandex
-        "spongedu",  # Seasoned contributor
-        "taiyang-li",
-        "ucasFL",  # Amos Bird's friend
-        "vdimir",  # Employee
-        "vzakaznikov",
-        "YiuRULE",
-        "zlobober",  # Developer of YT
-        "ilejn",  # Arenadata, responsible for Kerberized Kafka
-        "thomoco",  # ClickHouse
-        "BoloniniD",  # Seasoned contributor, HSE
-        "tonickkozlov",  # Cloudflare
-        "tylerhannan",  # ClickHouse Employee
-        "myrrc",  # Mike Kot, DoubleCloud
-        "thevar1able",  # ClickHouse Employee
-        "aalexfvk",
-        "MikhailBurdukov",
-        "tsolodov",  # ClickHouse Employee
-        "kitaisreal",
-    ]
-}
-
-
-def get_installation_id(jwt_token):
-    headers = {
-        "Authorization": f"Bearer {jwt_token}",
-        "Accept": "application/vnd.github.v3+json",
-    }
-    response = requests.get("https://api.github.com/app/installations", headers=headers)
-    response.raise_for_status()
-    data = response.json()
-    for installation in data:
-        if installation["account"]["login"] == "ClickHouse":
-            installation_id = installation["id"]
-    return installation_id
-
-
-def get_access_token(jwt_token, installation_id):
-    headers = {
-        "Authorization": f"Bearer {jwt_token}",
-        "Accept": "application/vnd.github.v3+json",
-    }
-    response = requests.post(
-        f"https://api.github.com/app/installations/{installation_id}/access_tokens",
-        headers=headers,
-    )
-    response.raise_for_status()
-    data = response.json()
-    return data["token"]
-
-
-def get_key_and_app_from_aws():
-    secret_name = "clickhouse_github_secret_key"
-    session = boto3.session.Session()
-    client = session.client(
-        service_name="secretsmanager",
-    )
-    get_secret_value_response = client.get_secret_value(SecretId=secret_name)
-    data = json.loads(get_secret_value_response["SecretString"])
-    return data["clickhouse-app-key"], int(data["clickhouse-app-id"])
-
 
 def is_trusted_contributor(pr_user_login, pr_user_orgs):
     if pr_user_login.lower() in TRUSTED_CONTRIBUTORS:
@@ -331,19 +230,6 @@ def label_manual_approve(pull_request, token):
     _exec_post_with_retry(url, token, data)
 
 
-def get_token_from_aws():
-    private_key, app_id = get_key_and_app_from_aws()
-    payload = {
-        "iat": int(time.time()) - 60,
-        "exp": int(time.time()) + (10 * 60),
-        "iss": app_id,
-    }
-
-    encoded_jwt = jwt.encode(payload, private_key, algorithm="RS256")
-    installation_id = get_installation_id(encoded_jwt)
-    return get_access_token(encoded_jwt, installation_id)
-
-
 def get_workflow_jobs(workflow_description, token):
     jobs_url = (
         workflow_description.api_url + f"/attempts/{workflow_description.attempt}/jobs"
@@ -443,7 +329,7 @@ def check_workflow_completed(
 
 
 def main(event):
-    token = get_token_from_aws()
+    token = get_cached_access_token()
     event_data = json.loads(event["body"])
     print("The body received:", event["body"])
     workflow_description = get_workflow_description_from_event(event_data)
diff --git a/tests/ci/workflow_approve_rerun_lambda/lambda_shared b/tests/ci/workflow_approve_rerun_lambda/lambda_shared
new file mode 120000
index 00000000000..ba86e090f6c
--- /dev/null
+++ b/tests/ci/workflow_approve_rerun_lambda/lambda_shared
@@ -0,0 +1 @@
+../lambda_shared_package/lambda_shared
\ No newline at end of file
diff --git a/tests/ci/workflow_approve_rerun_lambda/requirements.txt b/tests/ci/workflow_approve_rerun_lambda/requirements.txt
index 98be09ab232..4cb3fba0f7b 100644
--- a/tests/ci/workflow_approve_rerun_lambda/requirements.txt
+++ b/tests/ci/workflow_approve_rerun_lambda/requirements.txt
@@ -1,3 +1 @@
-requests<2.30
-PyJWT
-cryptography<38
+../lambda_shared_package[token]
diff --git a/tests/ci/workflow_jobs_lambda/lambda_shared b/tests/ci/workflow_jobs_lambda/lambda_shared
new file mode 120000
index 00000000000..ba86e090f6c
--- /dev/null
+++ b/tests/ci/workflow_jobs_lambda/lambda_shared
@@ -0,0 +1 @@
+../lambda_shared_package/lambda_shared
\ No newline at end of file

From 2dca0eac1b5024e97f8e36889d8d39f43e8e4c2b Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Tue, 23 May 2023 21:51:35 +0200
Subject: [PATCH 12/64] Delete __init__.py in lambda directories to break
 subpackage

---
 tests/ci/cancel_and_rerun_workflow_lambda/__init__.py | 0
 tests/ci/runner_token_rotation_lambda/__init__.py     | 0
 tests/ci/team_keys_lambda/__init__.py                 | 0
 tests/ci/terminate_runner_lambda/__init__.py          | 0
 tests/ci/workflow_approve_rerun_lambda/__init__.py    | 1 -
 5 files changed, 1 deletion(-)
 delete mode 100644 tests/ci/cancel_and_rerun_workflow_lambda/__init__.py
 delete mode 100644 tests/ci/runner_token_rotation_lambda/__init__.py
 delete mode 100644 tests/ci/team_keys_lambda/__init__.py
 delete mode 100644 tests/ci/terminate_runner_lambda/__init__.py
 delete mode 100644 tests/ci/workflow_approve_rerun_lambda/__init__.py

diff --git a/tests/ci/cancel_and_rerun_workflow_lambda/__init__.py b/tests/ci/cancel_and_rerun_workflow_lambda/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/tests/ci/runner_token_rotation_lambda/__init__.py b/tests/ci/runner_token_rotation_lambda/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/tests/ci/team_keys_lambda/__init__.py b/tests/ci/team_keys_lambda/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/tests/ci/terminate_runner_lambda/__init__.py b/tests/ci/terminate_runner_lambda/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/tests/ci/workflow_approve_rerun_lambda/__init__.py b/tests/ci/workflow_approve_rerun_lambda/__init__.py
deleted file mode 100644
index 4265cc3e6c1..00000000000
--- a/tests/ci/workflow_approve_rerun_lambda/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-#!/usr/bin/env python

From e8b03d74986a4e0f51f9cd064493cd5419c78add Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Fri, 26 May 2023 17:17:49 +0200
Subject: [PATCH 13/64] Move insert part for ClickHouseHelper to shared

---
 .../lambda_shared/__init__.py                 |  91 +++++++++-
 tests/ci/workflow_jobs_lambda/app.py          | 165 +++---------------
 .../ci/workflow_jobs_lambda/requirements.txt  |   2 +-
 3 files changed, 110 insertions(+), 148 deletions(-)

diff --git a/tests/ci/lambda_shared_package/lambda_shared/__init__.py b/tests/ci/lambda_shared_package/lambda_shared/__init__.py
index fe52f98d5f6..534d7773ddd 100644
--- a/tests/ci/lambda_shared_package/lambda_shared/__init__.py
+++ b/tests/ci/lambda_shared_package/lambda_shared/__init__.py
@@ -5,7 +5,7 @@ import json
 import logging
 import time
 from collections import namedtuple
-from typing import Any, List, Optional
+from typing import Any, Dict, Iterable, List, Optional
 
 import boto3  # type: ignore
 import requests  # type: ignore
@@ -36,10 +36,14 @@ class CHException(Exception):
     pass
 
 
+class InsertException(CHException):
+    pass
+
+
 class ClickHouseHelper:
     def __init__(
         self,
-        url: Optional[str] = None,
+        url: str,
         user: Optional[str] = None,
         password: Optional[str] = None,
     ):
@@ -50,6 +54,89 @@ class ClickHouseHelper:
         if password:
             self.auth["X-ClickHouse-Key"] = password
 
+    @staticmethod
+    def _insert_json_str_info_impl(
+        url: str, auth: Dict[str, str], db: str, table: str, json_str: str
+    ) -> None:
+        params = {
+            "database": db,
+            "query": f"INSERT INTO {table} FORMAT JSONEachRow",
+            "date_time_input_format": "best_effort",
+            "send_logs_level": "warning",
+        }
+
+        for i in range(5):
+            try:
+                response = requests.post(
+                    url, params=params, data=json_str, headers=auth
+                )
+            except Exception as e:
+                error = f"Received exception while sending data to {url} on {i} attempt: {e}"
+                logging.warning(error)
+                continue
+
+            logging.info("Response content '%s'", response.content)
+
+            if response.ok:
+                break
+
+            error = (
+                "Cannot insert data into clickhouse at try "
+                + str(i)
+                + ": HTTP code "
+                + str(response.status_code)
+                + ": '"
+                + str(response.text)
+                + "'"
+            )
+
+            if response.status_code >= 500:
+                # A retriable error
+                time.sleep(1)
+                continue
+
+            logging.info(
+                "Request headers '%s', body '%s'",
+                response.request.headers,
+                response.request.body,
+            )
+
+            raise InsertException(error)
+        else:
+            raise InsertException(error)
+
+    def _insert_json_str_info(self, db: str, table: str, json_str: str) -> None:
+        self._insert_json_str_info_impl(self.url, self.auth, db, table, json_str)
+
+    def insert_event_into(
+        self, db: str, table: str, event: object, safe: bool = True
+    ) -> None:
+        event_str = json.dumps(event)
+        try:
+            self._insert_json_str_info(db, table, event_str)
+        except InsertException as e:
+            logging.error(
+                "Exception happened during inserting data into clickhouse: %s", e
+            )
+            if not safe:
+                raise
+
+    def insert_events_into(
+        self, db: str, table: str, events: Iterable[object], safe: bool = True
+    ) -> None:
+        jsons = []
+        for event in events:
+            jsons.append(json.dumps(event))
+
+        try:
+            self._insert_json_str_info(db, table, ",".join(jsons))
+        except InsertException as e:
+            logging.error(
+                "Exception happened during inserting data into clickhouse: %s", e
+            )
+            if not safe:
+                raise
+
     def _select_and_get_json_each_row(self, db: str, query: str) -> str:
         params = {
             "database": db,
diff --git a/tests/ci/workflow_jobs_lambda/app.py b/tests/ci/workflow_jobs_lambda/app.py
index c4ce68c3f8e..c624a492604 100644
--- a/tests/ci/workflow_jobs_lambda/app.py
+++ b/tests/ci/workflow_jobs_lambda/app.py
@@ -10,13 +10,11 @@ fields for private repositories
 
 from base64 import b64decode
 from dataclasses import dataclass
-from typing import Any, List
+from typing import Any, List, Optional
 import json
 import logging
-import time
 
-import boto3  # type: ignore
-import requests  # type: ignore
+from lambda_shared import ClickHouseHelper, InsertException, get_parameter_from_ssm
 
 logging.getLogger().setLevel(logging.INFO)
 
@@ -66,137 +64,7 @@ class WorkflowJob:
         return self.__dict__
 
 
-### VENDORING
-def get_parameter_from_ssm(name, decrypt=True, client=None):
-    if not client:
-        client = boto3.client("ssm", region_name="us-east-1")
-    return client.get_parameter(Name=name, WithDecryption=decrypt)["Parameter"]["Value"]
-
-
-class InsertException(Exception):
-    pass
-
-
-class ClickHouseHelper:
-    def __init__(self, url=None):
-        if url is None:
-            url = get_parameter_from_ssm("clickhouse-test-stat-url")
-
-        self.url = url
-        self.auth = {
-            "X-ClickHouse-User": get_parameter_from_ssm("clickhouse-test-stat-login"),
-            "X-ClickHouse-Key": get_parameter_from_ssm("clickhouse-test-stat-password"),
-        }
-
-    @staticmethod
-    def _insert_json_str_info_impl(url, auth, db, table, json_str):
-        params = {
-            "database": db,
-            "query": f"INSERT INTO {table} FORMAT JSONEachRow",
-            "date_time_input_format": "best_effort",
-            "send_logs_level": "warning",
-        }
-
-        for i in range(5):
-            try:
-                response = requests.post(
-                    url, params=params, data=json_str, headers=auth
-                )
-            except Exception as e:
-                error = f"Received exception while sending data to {url} on {i} attempt: {e}"
-                logging.warning(error)
-                continue
-
-            logging.info("Response content '%s'", response.content)
-
-            if response.ok:
-                break
-
-            error = (
-                "Cannot insert data into clickhouse at try "
-                + str(i)
-                + ": HTTP code "
-                + str(response.status_code)
-                + ": '"
-                + str(response.text)
-                + "'"
-            )
-
-            if response.status_code >= 500:
-                # A retriable error
-                time.sleep(1)
-                continue
-
-            logging.info(
-                "Request headers '%s', body '%s'",
-                response.request.headers,
-                response.request.body,
-            )
-
-            raise InsertException(error)
-        else:
-            raise InsertException(error)
-
-    def _insert_json_str_info(self, db, table, json_str):
-        self._insert_json_str_info_impl(self.url, self.auth, db, table, json_str)
-
-    def insert_event_into(self, db, table, event, safe=True):
-        event_str = json.dumps(event)
-        try:
-            self._insert_json_str_info(db, table, event_str)
-        except InsertException as e:
-            logging.error(
-                "Exception happened during inserting data into clickhouse: %s", e
-            )
-            if not safe:
-                raise
-
-    def insert_events_into(self, db, table, events, safe=True):
-        jsons = []
-        for event in events:
-            jsons.append(json.dumps(event))
-
-        try:
-            self._insert_json_str_info(db, table, ",".join(jsons))
-        except InsertException as e:
-            logging.error(
-                "Exception happened during inserting data into clickhouse: %s", e
-            )
-            if not safe:
-                raise
-
-    def _select_and_get_json_each_row(self, db, query):
-        params = {
-            "database": db,
-            "query": query,
-            "default_format": "JSONEachRow",
-        }
-        for i in range(5):
-            response = None
-            try:
-                response = requests.get(self.url, params=params, headers=self.auth)
-                response.raise_for_status()
-                return response.text
-            except Exception as ex:
-                logging.warning("Cannot insert with exception %s", str(ex))
-                if response:
-                    logging.warning("Reponse text %s", response.text)
-                time.sleep(0.1 * i)
-
-        raise Exception("Cannot fetch data from clickhouse")
-
-    def select_json_each_row(self, db, query):
-        text = self._select_and_get_json_each_row(db, query)
-        result = []
-        for line in text.split("\n"):
-            if line:
-                result.append(json.loads(line))
-        return result
-
-
-### VENDORING END
-
-clickhouse_client = ClickHouseHelper()
+CH_CLIENT = None  # type: Optional[ClickHouseHelper]
 
 
 def send_event_workflow_job(workflow_job: WorkflowJob) -> None:
@@ -232,23 +100,30 @@ def send_event_workflow_job(workflow_job: WorkflowJob) -> None:
     # PARTITION BY toStartOfMonth(started_at)
     # ORDER BY (id, updated_at)
     # SETTINGS index_granularity = 8192
-    global clickhouse_client
-    kwargs = {
-        "db": "default",
-        "table": "workflow_jobs",
-        "event": workflow_job.as_dict(),
-        "safe": False,
-    }
+    global CH_CLIENT
+    CH_CLIENT = CH_CLIENT or ClickHouseHelper(
+        get_parameter_from_ssm("clickhouse-test-stat-url"),
+        get_parameter_from_ssm("clickhouse-test-stat-login"),
+        get_parameter_from_ssm("clickhouse-test-stat-password"),
+    )
     try:
-        clickhouse_client.insert_event_into(**kwargs)
+        CH_CLIENT.insert_event_into(
+            "default", "workflow_jobs", workflow_job.as_dict(), False
+        )
     except InsertException as ex:
         logging.exception(
             "Got an exception on insert, tryuing to update the client "
             "credentials and repeat",
             exc_info=ex,
         )
-        clickhouse_client = ClickHouseHelper()
-        clickhouse_client.insert_event_into(**kwargs)
+        CH_CLIENT = ClickHouseHelper(
+            get_parameter_from_ssm("clickhouse-test-stat-url"),
+            get_parameter_from_ssm("clickhouse-test-stat-login"),
+            get_parameter_from_ssm("clickhouse-test-stat-password"),
+        )
+        CH_CLIENT.insert_event_into(
+            "default", "workflow_jobs", workflow_job.as_dict(), False
+        )
 
 
 def handler(event: dict, context: Any) -> dict:
diff --git a/tests/ci/workflow_jobs_lambda/requirements.txt b/tests/ci/workflow_jobs_lambda/requirements.txt
index 3bcbe2dfd07..098e04a9798 100644
--- a/tests/ci/workflow_jobs_lambda/requirements.txt
+++ b/tests/ci/workflow_jobs_lambda/requirements.txt
@@ -1 +1 @@
-requests<2.30
+../lambda_shared_package

From 0ddd53088d48134cdbd55bcda2e6bc3bfad423de Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Wed, 31 May 2023 23:17:41 +0200
Subject: [PATCH 14/64] Add a new runner type for ci metrics and autoscaling

---
 tests/ci/autoscale_runners_lambda/app.py                 | 4 ++++
 tests/ci/lambda_shared_package/lambda_shared/__init__.py | 1 +
 2 files changed, 5 insertions(+)

diff --git a/tests/ci/autoscale_runners_lambda/app.py b/tests/ci/autoscale_runners_lambda/app.py
index ab09afb3aa8..825708cabbc 100644
--- a/tests/ci/autoscale_runners_lambda/app.py
+++ b/tests/ci/autoscale_runners_lambda/app.py
@@ -65,6 +65,10 @@ def get_scales(runner_type: str) -> Tuple[int, int]:
         # 10. I am trying 7 now.
         # UPDATE THE COMMENT ON CHANGES
         scale_up = 7
+    elif runner_type == "limited-tester":
+        # The limited runners should inflate and deflate faster
+        scale_down = 1
+        scale_up = 2
     return scale_down, scale_up
 
 
diff --git a/tests/ci/lambda_shared_package/lambda_shared/__init__.py b/tests/ci/lambda_shared_package/lambda_shared/__init__.py
index 534d7773ddd..c56994cc86a 100644
--- a/tests/ci/lambda_shared_package/lambda_shared/__init__.py
+++ b/tests/ci/lambda_shared_package/lambda_shared/__init__.py
@@ -15,6 +15,7 @@ RUNNER_TYPE_LABELS = [
     "func-tester",
     "func-tester-aarch64",
     "fuzzer-unit-tester",
+    "limited-tester",
     "stress-tester",
     "style-checker",
     "style-checker-aarch64",

From b7c5fdab77c41361af7b5130256a2afdf2bc1488 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Wed, 31 May 2023 23:56:39 +0200
Subject: [PATCH 15/64] Move lambda package building to
 public.ecr.aws/lambda/python for compatibility

---
 tests/ci/lambda_shared_package/pyproject.toml         | 5 +++--
 tests/ci/team_keys_lambda/build_and_deploy_archive.sh | 6 +++---
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/tests/ci/lambda_shared_package/pyproject.toml b/tests/ci/lambda_shared_package/pyproject.toml
index bbf74cc0649..dff36b89fbb 100644
--- a/tests/ci/lambda_shared_package/pyproject.toml
+++ b/tests/ci/lambda_shared_package/pyproject.toml
@@ -6,13 +6,14 @@ build-backend = "setuptools.build_meta"
 name = "lambda_shared"
 version = "0.0.1"
 dependencies = [
-  "requests < 2.30",
+  "requests",
+  "urllib3 < 2"
 ]
 
 [project.optional-dependencies]
 token = [
   "PyJWT",
-  "cryptography<38",
+  "cryptography",
 ]
 dev = [
   "boto3",
diff --git a/tests/ci/team_keys_lambda/build_and_deploy_archive.sh b/tests/ci/team_keys_lambda/build_and_deploy_archive.sh
index 89a2d514965..02d5638cf18 100644
--- a/tests/ci/team_keys_lambda/build_and_deploy_archive.sh
+++ b/tests/ci/team_keys_lambda/build_and_deploy_archive.sh
@@ -12,7 +12,7 @@ DRY_RUN=${DRY_RUN:-}
 PY_VERSION=${PY_VERSION:-3.10}
 PY_EXEC="python${PY_VERSION}"
 # Image to build the lambda zip package
-DOCKER_IMAGE="python:${PY_VERSION}-slim"
+DOCKER_IMAGE="public.ecr.aws/lambda/python:${PY_VERSION}"
 # Rename the_lambda_name directory to the-lambda-name lambda in AWS
 LAMBDA_NAME=${DIR_NAME//_/-}
 # The name of directory with lambda code
@@ -23,9 +23,9 @@ cp app.py "$PACKAGE"
 if [ -f requirements.txt ]; then
   VENV=lambda-venv
   rm -rf "$VENV" lambda-package.zip
-  docker run --rm --user="${UID}" -e HOME=/tmp \
+  docker run --rm --user="${UID}" -e HOME=/tmp --entrypoint=/bin/bash \
     --volume="${WORKDIR}/..:/ci" --workdir="/ci/${DIR_NAME}" "${DOCKER_IMAGE}" \
-    /bin/bash -exc "
+    -exc "
       '$PY_EXEC' -m venv '$VENV' &&
       source '$VENV/bin/activate' &&
       pip install -r requirements.txt

From e657e2ba10c425caa550de6e9b3814dca8fc3f32 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Thu, 1 Jun 2023 17:12:19 +0200
Subject: [PATCH 16/64] Additional logging in autoscale_runners_lambda

---
 tests/ci/autoscale_runners_lambda/app.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/tests/ci/autoscale_runners_lambda/app.py b/tests/ci/autoscale_runners_lambda/app.py
index 825708cabbc..bebfb594b59 100644
--- a/tests/ci/autoscale_runners_lambda/app.py
+++ b/tests/ci/autoscale_runners_lambda/app.py
@@ -117,7 +117,17 @@ def set_capacity(
         # Finally, should the capacity be even changed
         stop = stop or asg["DesiredCapacity"] == desired_capacity
         if stop:
+            logging.info(
+                "Do not increase ASG %s capacity, current capacity=%s, "
+                "maximum capacity=%s, running jobs=%s, queue size=%s",
+                asg["AutoScalingGroupName"],
+                desired_capacity,
+                asg["MaxSize"],
+                running,
+                queued,
+            )
             return
+
         logging.info(
             "The ASG %s capacity will be increased to %s, current capacity=%s, "
             "maximum capacity=%s, running jobs=%s, queue size=%s",
@@ -142,6 +152,15 @@ def set_capacity(
     desired_capacity = min(desired_capacity, asg["MaxSize"])
     stop = stop or asg["DesiredCapacity"] == desired_capacity
     if stop:
+        logging.info(
+            "Do not decrease ASG %s capacity, current capacity=%s, "
+            "minimum capacity=%s, running jobs=%s, queue size=%s",
+            asg["AutoScalingGroupName"],
+            desired_capacity,
+            asg["MinSize"],
+            running,
+            queued,
+        )
         return
 
     logging.info(

From b775b5cfd6eb924bc119ea80b08fde9a4bb42a1b Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Thu, 1 Jun 2023 17:44:57 +0200
Subject: [PATCH 17/64] Move all CI runners metrics into one namespace

---
 tests/ci/ci_runners_metrics_lambda/app.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/tests/ci/ci_runners_metrics_lambda/app.py b/tests/ci/ci_runners_metrics_lambda/app.py
index dc128dea739..d8b9489b1a7 100644
--- a/tests/ci/ci_runners_metrics_lambda/app.py
+++ b/tests/ci/ci_runners_metrics_lambda/app.py
@@ -171,18 +171,21 @@ def group_runners_by_tag(
 
 
 def push_metrics_to_cloudwatch(
-    listed_runners: RunnerDescriptions, namespace: str
+    listed_runners: RunnerDescriptions, group_name: str
 ) -> None:
     client = boto3.client("cloudwatch")
+    namespace = "RunnersMetrics"
     metrics_data = []
     busy_runners = sum(
         1 for runner in listed_runners if runner.busy and not runner.offline
     )
+    dimensions = [{"Name": "group", "Value": group_name}]
     metrics_data.append(
         {
             "MetricName": "BusyRunners",
             "Value": busy_runners,
             "Unit": "Count",
+            "Dimensions": dimensions,
         }
     )
     total_active_runners = sum(1 for runner in listed_runners if not runner.offline)
@@ -191,6 +194,7 @@ def push_metrics_to_cloudwatch(
             "MetricName": "ActiveRunners",
             "Value": total_active_runners,
             "Unit": "Count",
+            "Dimensions": dimensions,
         }
     )
     total_runners = len(listed_runners)
@@ -199,6 +203,7 @@ def push_metrics_to_cloudwatch(
             "MetricName": "TotalRunners",
             "Value": total_runners,
             "Unit": "Count",
+            "Dimensions": dimensions,
         }
     )
     if total_active_runners == 0:
@@ -211,6 +216,7 @@ def push_metrics_to_cloudwatch(
             "MetricName": "BusyRunnersRatio",
             "Value": busy_ratio,
             "Unit": "Percent",
+            "Dimensions": dimensions,
         }
     )
 
@@ -242,7 +248,7 @@ def main(
     for group, group_runners in grouped_runners.items():
         if push_to_cloudwatch:
             print(f"Pushing metrics for group '{group}'")
-            push_metrics_to_cloudwatch(group_runners, "RunnersMetrics/" + group)
+            push_metrics_to_cloudwatch(group_runners, group)
         else:
             print(group, f"({len(group_runners)})")
             for runner in group_runners:

From 3c13eaa1592b5cf46d6aca8b3764aefc546eeff7 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Thu, 1 Jun 2023 22:03:42 +0200
Subject: [PATCH 18/64] Simplify get_dead_runners_in_ec2 a little bit, update
 runners

---
 tests/ci/ci_runners_metrics_lambda/app.py | 62 +++++++++++------------
 1 file changed, 31 insertions(+), 31 deletions(-)

diff --git a/tests/ci/ci_runners_metrics_lambda/app.py b/tests/ci/ci_runners_metrics_lambda/app.py
index d8b9489b1a7..9c38659269b 100644
--- a/tests/ci/ci_runners_metrics_lambda/app.py
+++ b/tests/ci/ci_runners_metrics_lambda/app.py
@@ -99,44 +99,44 @@ def get_dead_runners_in_ec2(runners: RunnerDescriptions) -> RunnerDescriptions:
 def get_lost_ec2_instances(runners: RunnerDescriptions) -> List[dict]:
     client = boto3.client("ec2")
     reservations = client.describe_instances(
-        Filters=[{"Name": "tag-key", "Values": ["github:runner-type"]}]
+        Filters=[{"Name": "tag-key", "Values": ["github:runner-type"]}],
     )["Reservations"]
-    lost_instances = []
-    offline_runners = [
-        runner.name for runner in runners if runner.offline and not runner.busy
+    # flatten the reservation into instances
+    instances = [
+        instance
+        for reservation in reservations
+        for instance in reservation["Instances"]
     ]
-    # Here we refresh the runners to get the most recent state
+    lost_instances = []
+    offline_runner_names = {
+        runner.name for runner in runners if runner.offline and not runner.busy
+    }
+    runner_names = {runner.name for runner in runners}
     now = datetime.now().timestamp()
 
-    for reservation in reservations:
-        for instance in reservation["Instances"]:
-            # Do not consider instances started 20 minutes ago as problematic
-            if now - instance["LaunchTime"].timestamp() < 1200:
-                continue
+    for instance in instances:
+        # Do not consider instances started 20 minutes ago as problematic
+        if now - instance["LaunchTime"].timestamp() < 1200:
+            continue
 
-            runner_type = [
-                tag["Value"]
-                for tag in instance["Tags"]
-                if tag["Key"] == "github:runner-type"
-            ][0]
-            # If there's no necessary labels in runner type it's fine
-            if not (
-                UNIVERSAL_LABEL in runner_type or runner_type in RUNNER_TYPE_LABELS
-            ):
-                continue
+        runner_type = [
+            tag["Value"]
+            for tag in instance["Tags"]
+            if tag["Key"] == "github:runner-type"
+        ][0]
+        # If there's no necessary labels in runner type it's fine
+        if not (UNIVERSAL_LABEL in runner_type or runner_type in RUNNER_TYPE_LABELS):
+            continue
 
-            if instance["InstanceId"] in offline_runners:
-                lost_instances.append(instance)
-                continue
+        if instance["InstanceId"] in offline_runner_names:
+            lost_instances.append(instance)
+            continue
 
-            if instance["State"]["Name"] == "running" and (
-                not [
-                    runner
-                    for runner in runners
-                    if runner.name == instance["InstanceId"]
-                ]
-            ):
-                lost_instances.append(instance)
+        if (
+            instance["State"]["Name"] == "running"
+            and not instance["InstanceId"] in runner_names
+        ):
+            lost_instances.append(instance)
 
     return lost_instances
 

From 9bd0a53e7c3ce6b1ad86bc09b134319a8563ff47 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Thu, 1 Jun 2023 22:28:02 +0200
Subject: [PATCH 19/64] Get only online instances in get_lost_ec2_instances

---
 tests/ci/ci_runners_metrics_lambda/app.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/ci/ci_runners_metrics_lambda/app.py b/tests/ci/ci_runners_metrics_lambda/app.py
index 9c38659269b..71a644fe072 100644
--- a/tests/ci/ci_runners_metrics_lambda/app.py
+++ b/tests/ci/ci_runners_metrics_lambda/app.py
@@ -99,7 +99,10 @@ def get_dead_runners_in_ec2(runners: RunnerDescriptions) -> RunnerDescriptions:
 def get_lost_ec2_instances(runners: RunnerDescriptions) -> List[dict]:
     client = boto3.client("ec2")
     reservations = client.describe_instances(
-        Filters=[{"Name": "tag-key", "Values": ["github:runner-type"]}],
+        Filters=[
+            {"Name": "tag-key", "Values": ["github:runner-type"]},
+            {"Name": "instance-state-name", "Values": ["pending", "running"]},
+        ],
     )["Reservations"]
     # flatten the reservation into instances
     instances = [

From 5e17adc9c0cab1ac911ccf6c7ad3cb1f8d8c7447 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?=
 <benjamin.antal@clickhouse.com>
Date: Fri, 2 Jun 2023 14:08:14 +0000
Subject: [PATCH 20/64] Add `system.user_processes` table

---
 .../System/StorageSystemUserProcesses.cpp     | 60 +++++++++++++++++++
 .../System/StorageSystemUserProcesses.h       | 29 +++++++++
 src/Storages/System/attachSystemTables.cpp    |  2 +
 3 files changed, 91 insertions(+)
 create mode 100644 src/Storages/System/StorageSystemUserProcesses.cpp
 create mode 100644 src/Storages/System/StorageSystemUserProcesses.h

diff --git a/src/Storages/System/StorageSystemUserProcesses.cpp b/src/Storages/System/StorageSystemUserProcesses.cpp
new file mode 100644
index 00000000000..5973f9e2af3
--- /dev/null
+++ b/src/Storages/System/StorageSystemUserProcesses.cpp
@@ -0,0 +1,60 @@
+#include <Columns/ColumnArray.h>
+#include <Columns/ColumnsNumber.h>
+#include <Core/Settings.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeFactory.h>
+#include <DataTypes/DataTypeMap.h>
+#include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <Interpreters/Context.h>
+#include <Interpreters/ProcessList.h>
+#include <Interpreters/ProfileEventsExt.h>
+#include <Storages/System/StorageSystemUserProcesses.h>
+#include <Common/IPv6ToBinary.h>
+#include <Common/typeid_cast.h>
+
+
+namespace DB
+{
+
+NamesAndTypesList StorageSystemUserProcesses::getNamesAndTypes()
+{
+    return {
+        {"user", std::make_shared<DataTypeString>()},
+        {"memory_usage", std::make_shared<DataTypeInt64>()},
+        {"peak_memory_usage", std::make_shared<DataTypeInt64>()},
+        {"ProfileEvents", std::make_shared<DataTypeMap>(std::make_shared<DataTypeString>(), std::make_shared<DataTypeUInt64>())},
+    };
+}
+
+NamesAndAliases StorageSystemUserProcesses::getNamesAndAliases()
+{
+    return {
+        {"ProfileEvents.Names", {std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())}, "mapKeys(ProfileEvents)"},
+        {"ProfileEvents.Values", {std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>())}, "mapValues(ProfileEvents)"}};
+}
+
+void StorageSystemUserProcesses::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const
+{
+    const auto user_info = context->getProcessList().getUserInfo(true);
+
+    for (const auto & [user, info] : user_info)
+    {
+        size_t i = 0;
+
+        res_columns[i++]->insert(user);
+        res_columns[i++]->insert(info.memory_usage);
+        res_columns[i++]->insert(info.peak_memory_usage);
+        {
+            IColumn * column = res_columns[i++].get();
+
+            if (info.profile_counters)
+                ProfileEvents::dumpToMapColumn(*info.profile_counters, column, true);
+            else
+            {
+                column->insertDefault();
+            }
+        }
+    }
+}
+}
diff --git a/src/Storages/System/StorageSystemUserProcesses.h b/src/Storages/System/StorageSystemUserProcesses.h
new file mode 100644
index 00000000000..9bdc009d849
--- /dev/null
+++ b/src/Storages/System/StorageSystemUserProcesses.h
@@ -0,0 +1,29 @@
+#pragma once
+
+#include <Storages/System/IStorageSystemOneBlock.h>
+
+
+namespace DB
+{
+
+class Context;
+
+
+/** Implements `processes` system table, which allows you to get information about the queries that are currently executing.
+  */
+class StorageSystemUserProcesses final : public IStorageSystemOneBlock<StorageSystemUserProcesses>
+{
+public:
+    std::string getName() const override { return "SystemUserProcesses"; }
+
+    static NamesAndTypesList getNamesAndTypes();
+
+    static NamesAndAliases getNamesAndAliases();
+
+protected:
+    using IStorageSystemOneBlock::IStorageSystemOneBlock;
+
+    void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override;
+};
+
+}
diff --git a/src/Storages/System/attachSystemTables.cpp b/src/Storages/System/attachSystemTables.cpp
index 424c74662ec..7d21d9e39d2 100644
--- a/src/Storages/System/attachSystemTables.cpp
+++ b/src/Storages/System/attachSystemTables.cpp
@@ -36,6 +36,7 @@
 #include <Storages/System/StorageSystemPartsColumns.h>
 #include <Storages/System/StorageSystemProjectionPartsColumns.h>
 #include <Storages/System/StorageSystemProcesses.h>
+#include <Storages/System/StorageSystemUserProcesses.h>
 #include <Storages/System/StorageSystemReplicas.h>
 #include <Storages/System/StorageSystemReplicationQueue.h>
 #include <Storages/System/StorageSystemDistributionQueue.h>
@@ -185,6 +186,7 @@ void attachSystemTablesServer(ContextPtr context, IDatabase & system_database, b
     attach<StorageSystemRemoteDataPaths>(context, system_database, "remote_data_paths");
     attach<StorageSystemCertificates>(context, system_database, "certificates");
     attach<StorageSystemNamedCollections>(context, system_database, "named_collections");
+    attach<StorageSystemUserProcesses>(context, system_database, "user_processes");
 
     if (has_zookeeper)
     {

From 17cca6ed756eaaa58eae7ef6aa89e43dcda8ce24 Mon Sep 17 00:00:00 2001
From: DanRoscigno <dan@roscigno.com>
Date: Fri, 2 Jun 2023 10:08:48 -0400
Subject: [PATCH 21/64] add direct join docs

---
 .../integrations/embedded-rocksdb.md          | 85 +++++++++++++++++++
 1 file changed, 85 insertions(+)

diff --git a/docs/en/engines/table-engines/integrations/embedded-rocksdb.md b/docs/en/engines/table-engines/integrations/embedded-rocksdb.md
index a3604b3c332..dab741a9f63 100644
--- a/docs/en/engines/table-engines/integrations/embedded-rocksdb.md
+++ b/docs/en/engines/table-engines/integrations/embedded-rocksdb.md
@@ -120,3 +120,88 @@ Values can be updated using the `ALTER TABLE` query. The primary key cannot be u
 ```sql
 ALTER TABLE test UPDATE v1 = v1 * 10 + 2 WHERE key LIKE 'some%' AND v3 > 3.1;
 ```
+
+### Joins
+
+A special `direct` join with EmbeddedRocksDB tables is supported.
+This direct join avoids forming a hash table in memory and accesses
+the data directly from the EmbeddedRocksDB.
+
+To enable direct joins:
+```sql
+SET join_algorithm = 'direct'
+```
+
+:::tip
+When the `join_algorithm` is set to `direct`, direct joins will be used
+when possible.  However, direct joins are not used for RIGHT or FULL JOINs.
+ClickHouse will choose another join algorithm when direct joins are not possible.
+:::
+
+#### Example
+
+##### Create and populate an EmbeddedRocksDB table:
+```sql
+CREATE TABLE rdb
+(
+    `key` UInt32,
+    `value` Array(UInt32),
+    `value2` String
+)
+ENGINE = EmbeddedRocksDB
+PRIMARY KEY key
+```
+
+```sql
+INSERT INTO rdb
+    SELECT
+        toUInt32(sipHash64(number) % 10) as key,
+        [key, key+1] as value,
+        ('val2' || toString(key)) as value2
+    FROM numbers_mt(10);
+```
+
+##### Create and populate a table to join with table `rdb`:
+
+```sql
+CREATE TABLE t2
+(
+    `k` UInt16
+)
+ENGINE = TinyLog
+```
+
+```sql
+INSERT INTO t2 SELECT number AS k
+FROM numbers_mt(10)
+```
+
+##### Set the join algorithm to `direct`:
+
+```sql
+SET join_algorithm = 'direct'
+```
+
+##### An INNER JOIN:
+```sql
+SELECT *
+FROM
+(
+    SELECT k AS key
+    FROM t2
+) AS t2
+INNER JOIN rdb ON rdb.key = t2.key
+ORDER BY key ASC
+```
+```response
+┌─key─┬─rdb.key─┬─value──┬─value2─┐
+│   0 │       0 │ [0,1]  │ val20  │
+│   2 │       2 │ [2,3]  │ val22  │
+│   3 │       3 │ [3,4]  │ val23  │
+│   6 │       6 │ [6,7]  │ val26  │
+│   7 │       7 │ [7,8]  │ val27  │
+│   8 │       8 │ [8,9]  │ val28  │
+│   9 │       9 │ [9,10] │ val29  │
+└─────┴─────────┴────────┴────────┘
+```
+

From a0901b1d1cb938759e6bcca37d0b03df0c1929e9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?=
 <benjamin.antal@clickhouse.com>
Date: Fri, 2 Jun 2023 15:33:38 +0000
Subject: [PATCH 22/64] Add tests

---
 .../02771_system_user_processes.reference         |  6 ++++++
 .../0_stateless/02771_system_user_processes.sh    | 15 +++++++++++++++
 2 files changed, 21 insertions(+)
 create mode 100644 tests/queries/0_stateless/02771_system_user_processes.reference
 create mode 100755 tests/queries/0_stateless/02771_system_user_processes.sh

diff --git a/tests/queries/0_stateless/02771_system_user_processes.reference b/tests/queries/0_stateless/02771_system_user_processes.reference
new file mode 100644
index 00000000000..ab0ff41ddc5
--- /dev/null
+++ b/tests/queries/0_stateless/02771_system_user_processes.reference
@@ -0,0 +1,6 @@
+0
+0
+default
+test_user_02771
+default	true	true
+test_user_02771	2	2
diff --git a/tests/queries/0_stateless/02771_system_user_processes.sh b/tests/queries/0_stateless/02771_system_user_processes.sh
new file mode 100755
index 00000000000..e8bf88a9fb2
--- /dev/null
+++ b/tests/queries/0_stateless/02771_system_user_processes.sh
@@ -0,0 +1,15 @@
+#!/usr/bin/env bash
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+$CLICKHOUSE_CLIENT -q "DROP USER IF EXISTS test_user_02771"
+$CLICKHOUSE_CLIENT -q "CREATE USER test_user_02771"
+$CLICKHOUSE_CLIENT -u test_user_02771 -q "SELECT * FROM system.numbers LIMIT 1"
+$CLICKHOUSE_CLIENT -u test_user_02771 -q "SELECT * FROM system.numbers LIMIT 1"
+$CLICKHOUSE_CLIENT -q "SELECT user FROM system.user_processes"
+$CLICKHOUSE_CLIENT -q "SELECT user, toBool(ProfileEvents['SelectQuery'] > 0), toBool(ProfileEvents['Query'] > 0) FROM system.user_processes WHERE user='default'"
+$CLICKHOUSE_CLIENT -q "SELECT user, ProfileEvents['SelectQuery'], ProfileEvents['Query'] FROM system.user_processes WHERE user='test_user_02771'"
+$CLICKHOUSE_CLIENT -q "DROP USER test_user_02771"
+

From da4d55cdaf4e25e16ddbf9028e6c8f5d336c60f6 Mon Sep 17 00:00:00 2001
From: Valentin Alexeev <valentin.alekseev@gmail.com>
Date: Fri, 2 Jun 2023 14:02:26 +0200
Subject: [PATCH 23/64] Additional error information when JSON is too large

If a parser fails on a large JSON, then output the last position processed to allow review.
---
 src/Formats/JSONUtils.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Formats/JSONUtils.cpp b/src/Formats/JSONUtils.cpp
index 284cffdb9d7..7b7c073b6b2 100644
--- a/src/Formats/JSONUtils.cpp
+++ b/src/Formats/JSONUtils.cpp
@@ -45,9 +45,9 @@ namespace JSONUtils
             const auto current_object_size = memory.size() + static_cast<size_t>(pos - in.position());
             if (min_bytes != 0 && current_object_size > 10 * min_bytes)
                 throw ParsingException(ErrorCodes::INCORRECT_DATA,
-                    "Size of JSON object is extremely large. Expected not greater than {} bytes, but current is {} bytes per row. "
+                    "Size of JSON object at position {} is extremely large. Expected not greater than {} bytes, but current is {} bytes per row. "
                     "Increase the value setting 'min_chunk_bytes_for_parallel_parsing' or check your data manually, "
-                    "most likely JSON is malformed", min_bytes, current_object_size);
+                    "most likely JSON is malformed", pos, min_bytes, current_object_size);
 
             if (quotes)
             {

From 516cda94eeb6c822b12697fd32921cc79ea97c15 Mon Sep 17 00:00:00 2001
From: Valentin Alexeev <valentin.alekseev@gmail.com>
Date: Fri, 2 Jun 2023 17:14:21 +0200
Subject: [PATCH 24/64] Use in.count() instead of pos

---
 src/Formats/JSONUtils.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Formats/JSONUtils.cpp b/src/Formats/JSONUtils.cpp
index 7b7c073b6b2..0aac72c68fe 100644
--- a/src/Formats/JSONUtils.cpp
+++ b/src/Formats/JSONUtils.cpp
@@ -47,7 +47,7 @@ namespace JSONUtils
                 throw ParsingException(ErrorCodes::INCORRECT_DATA,
                     "Size of JSON object at position {} is extremely large. Expected not greater than {} bytes, but current is {} bytes per row. "
                     "Increase the value setting 'min_chunk_bytes_for_parallel_parsing' or check your data manually, "
-                    "most likely JSON is malformed", pos, min_bytes, current_object_size);
+                    "most likely JSON is malformed", in.count(), min_bytes, current_object_size);
 
             if (quotes)
             {

From d28b4181e94c5602b5512af8ed541dcc2a1a55f2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?=
 <benjamin.antal@clickhouse.com>
Date: Fri, 2 Jun 2023 16:57:36 +0000
Subject: [PATCH 25/64] Add `SHOW USER PROCESSES` query

---
 src/Interpreters/InterpreterFactory.cpp       |  6 ++++
 .../InterpreterShowUserProcessesQuery.cpp     | 18 +++++++++++
 .../InterpreterShowUserProcessesQuery.h       | 30 +++++++++++++++++
 src/Parsers/ASTShowUserProcessesQuery.h       | 17 ++++++++++
 src/Parsers/ParserQueryWithOutput.cpp         |  5 ++-
 src/Parsers/ParserShowUserProcessesQuery.h    | 32 +++++++++++++++++++
 6 files changed, 107 insertions(+), 1 deletion(-)
 create mode 100644 src/Interpreters/InterpreterShowUserProcessesQuery.cpp
 create mode 100644 src/Interpreters/InterpreterShowUserProcessesQuery.h
 create mode 100644 src/Parsers/ASTShowUserProcessesQuery.h
 create mode 100644 src/Parsers/ParserShowUserProcessesQuery.h

diff --git a/src/Interpreters/InterpreterFactory.cpp b/src/Interpreters/InterpreterFactory.cpp
index 9cd1f2a251c..c31e3801478 100644
--- a/src/Interpreters/InterpreterFactory.cpp
+++ b/src/Interpreters/InterpreterFactory.cpp
@@ -23,6 +23,7 @@
 #include <Parsers/ASTShowTablesQuery.h>
 #include <Parsers/ASTShowColumnsQuery.h>
 #include <Parsers/ASTShowIndexesQuery.h>
+#include <Parsers/ASTShowUserProcessesQuery.h>
 #include <Parsers/ASTUseQuery.h>
 #include <Parsers/ASTWatchQuery.h>
 #include <Parsers/ASTCreateNamedCollectionQuery.h>
@@ -81,6 +82,7 @@
 #include <Interpreters/InterpreterShowEngineQuery.h>
 #include <Interpreters/InterpreterShowProcesslistQuery.h>
 #include <Interpreters/InterpreterShowTablesQuery.h>
+#include <Interpreters/InterpreterShowUserProcessesQuery.h>
 #include <Interpreters/InterpreterShowColumnsQuery.h>
 #include <Interpreters/InterpreterShowIndexesQuery.h>
 #include <Interpreters/InterpreterSystemQuery.h>
@@ -266,6 +268,10 @@ std::unique_ptr<IInterpreter> InterpreterFactory::get(ASTPtr & query, ContextMut
     {
         return std::make_unique<InterpreterShowProcesslistQuery>(query, context);
     }
+    else if (query->as<ASTShowUserProcessesQuery>())
+    {
+        return std::make_unique<InterpreterShowUserProcessesQuery>(query, context);
+    }
     else if (query->as<ASTAlterQuery>())
     {
         return std::make_unique<InterpreterAlterQuery>(query, context);
diff --git a/src/Interpreters/InterpreterShowUserProcessesQuery.cpp b/src/Interpreters/InterpreterShowUserProcessesQuery.cpp
new file mode 100644
index 00000000000..51287a7ad5b
--- /dev/null
+++ b/src/Interpreters/InterpreterShowUserProcessesQuery.cpp
@@ -0,0 +1,18 @@
+#include <IO/ReadBufferFromString.h>
+
+#include <Interpreters/Context.h>
+#include <Interpreters/executeQuery.h>
+#include <Interpreters/InterpreterShowUserProcessesQuery.h>
+
+#include <Parsers/ASTQueryWithOutput.h>
+
+
+namespace DB
+{
+
+BlockIO InterpreterShowUserProcessesQuery::execute()
+{
+    return executeQuery("SELECT * FROM system.user_processes ORDER BY user DESC", getContext(), true);
+}
+
+}
diff --git a/src/Interpreters/InterpreterShowUserProcessesQuery.h b/src/Interpreters/InterpreterShowUserProcessesQuery.h
new file mode 100644
index 00000000000..a1c385dc82f
--- /dev/null
+++ b/src/Interpreters/InterpreterShowUserProcessesQuery.h
@@ -0,0 +1,30 @@
+#pragma once
+
+#include <Interpreters/IInterpreter.h>
+#include <Parsers/IAST_fwd.h>
+
+
+namespace DB
+{
+
+/** Return list of currently executing queries.
+TODO(antaljanosbenjamin)
+  */
+class InterpreterShowUserProcessesQuery : public IInterpreter, WithMutableContext
+{
+public:
+    InterpreterShowUserProcessesQuery(const ASTPtr & query_ptr_, ContextMutablePtr context_)
+        : WithMutableContext(context_), query_ptr(query_ptr_) {}
+
+    BlockIO execute() override;
+
+    /// We ignore the quota and limits here because execute() will rewrite a show query as a SELECT query and then
+    /// the SELECT query will checks the quota and limits.
+    bool ignoreQuota() const override { return true; }
+    bool ignoreLimits() const override { return true; }
+
+private:
+    ASTPtr query_ptr;
+};
+
+}
diff --git a/src/Parsers/ASTShowUserProcessesQuery.h b/src/Parsers/ASTShowUserProcessesQuery.h
new file mode 100644
index 00000000000..cd522c152b6
--- /dev/null
+++ b/src/Parsers/ASTShowUserProcessesQuery.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include <Parsers/ASTQueryWithOutput.h>
+
+
+namespace DB
+{
+
+struct ASTShowUserProcessesIDAndQueryNames
+{
+    static constexpr auto ID = "ShowUserProcesses";
+    static constexpr auto Query = "SHOW USER PROCESSES";
+};
+
+using ASTShowUserProcessesQuery = ASTQueryWithOutputImpl<ASTShowUserProcessesIDAndQueryNames>;
+
+}
diff --git a/src/Parsers/ParserQueryWithOutput.cpp b/src/Parsers/ParserQueryWithOutput.cpp
index 6796f4528c4..d5293e5f709 100644
--- a/src/Parsers/ParserQueryWithOutput.cpp
+++ b/src/Parsers/ParserQueryWithOutput.cpp
@@ -15,6 +15,7 @@
 #include <Parsers/ParserSelectWithUnionQuery.h>
 #include <Parsers/ParserSetQuery.h>
 #include <Parsers/ParserShowProcesslistQuery.h>
+#include <Parsers/ParserShowUserProcessesQuery.h>
 #include <Parsers/ParserShowTablesQuery.h>
 #include <Parsers/ParserShowColumnsQuery.h>
 #include <Parsers/ParserShowEngineQuery.h>
@@ -61,6 +62,7 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
     ParserShowGrantsQuery show_grants_p;
     ParserShowPrivilegesQuery show_privileges_p;
     ParserExplainQuery explain_p(end, allow_settings_after_format_in_insert);
+    ParserShowUserProcessesQuery show_user_processes_p;
 
     ASTPtr query;
 
@@ -88,7 +90,8 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
         || show_access_p.parse(pos, query, expected)
         || show_access_entities_p.parse(pos, query, expected)
         || show_grants_p.parse(pos, query, expected)
-        || show_privileges_p.parse(pos, query, expected);
+        || show_privileges_p.parse(pos, query, expected)
+        || show_user_processes_p.parse(pos, query, expected);
 
     if (!parsed)
         return false;
diff --git a/src/Parsers/ParserShowUserProcessesQuery.h b/src/Parsers/ParserShowUserProcessesQuery.h
new file mode 100644
index 00000000000..be484e74d5d
--- /dev/null
+++ b/src/Parsers/ParserShowUserProcessesQuery.h
@@ -0,0 +1,32 @@
+#pragma once
+
+#include <Parsers/ASTShowUserProcessesQuery.h>
+#include <Parsers/CommonParsers.h>
+#include <Parsers/ExpressionElementParsers.h>
+#include <Parsers/IParserBase.h>
+
+
+namespace DB
+{
+
+/** Query SHOW USER PROCESSES
+  */
+class ParserShowUserProcessesQuery : public IParserBase
+{
+protected:
+    const char * getName() const override { return "SHOW USER PROCESSES query"; }
+
+    bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override
+    {
+        auto query = std::make_shared<ASTShowUserProcessesQuery>();
+
+        if (!ParserKeyword("SHOW USER PROCESSES").ignore(pos, expected))
+            return false;
+
+        node = query;
+
+        return true;
+    }
+};
+
+}

From 96fe4b5107611a627b7981fdac2afe9304660e48 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?=
 <benjamin.antal@clickhouse.com>
Date: Fri, 2 Jun 2023 16:57:46 +0000
Subject: [PATCH 26/64] Add tests

---
 .../02771_system_user_processes.reference     |  5 ++---
 .../02771_system_user_processes.sh            | 19 +++++++++++--------
 2 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/tests/queries/0_stateless/02771_system_user_processes.reference b/tests/queries/0_stateless/02771_system_user_processes.reference
index ab0ff41ddc5..8c8ca8abb52 100644
--- a/tests/queries/0_stateless/02771_system_user_processes.reference
+++ b/tests/queries/0_stateless/02771_system_user_processes.reference
@@ -1,6 +1,5 @@
+SHOW USER PROCESSES query succeeded!
 0
 0
-default
-test_user_02771
 default	true	true
-test_user_02771	2	2
+2	2
diff --git a/tests/queries/0_stateless/02771_system_user_processes.sh b/tests/queries/0_stateless/02771_system_user_processes.sh
index e8bf88a9fb2..910af4be9e2 100755
--- a/tests/queries/0_stateless/02771_system_user_processes.sh
+++ b/tests/queries/0_stateless/02771_system_user_processes.sh
@@ -4,12 +4,15 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CUR_DIR"/../shell_config.sh
 
-$CLICKHOUSE_CLIENT -q "DROP USER IF EXISTS test_user_02771"
-$CLICKHOUSE_CLIENT -q "CREATE USER test_user_02771"
-$CLICKHOUSE_CLIENT -u test_user_02771 -q "SELECT * FROM system.numbers LIMIT 1"
-$CLICKHOUSE_CLIENT -u test_user_02771 -q "SELECT * FROM system.numbers LIMIT 1"
-$CLICKHOUSE_CLIENT -q "SELECT user FROM system.user_processes"
-$CLICKHOUSE_CLIENT -q "SELECT user, toBool(ProfileEvents['SelectQuery'] > 0), toBool(ProfileEvents['Query'] > 0) FROM system.user_processes WHERE user='default'"
-$CLICKHOUSE_CLIENT -q "SELECT user, ProfileEvents['SelectQuery'], ProfileEvents['Query'] FROM system.user_processes WHERE user='test_user_02771'"
-$CLICKHOUSE_CLIENT -q "DROP USER test_user_02771"
+USER_POSTFIX=`random_str 10`
+USER="test_user_02771_$USER_POSTFIX"
+
+$CLICKHOUSE_CLIENT -q "SHOW USER PROCESSES" &>"${CLICKHOUSE_TMP}/test_output" && echo "SHOW USER PROCESSES query succeeded!" || cat "${CLICKHOUSE_TMP}/test_output" 
+$CLICKHOUSE_CLIENT -q "DROP USER IF EXISTS $USER"
+$CLICKHOUSE_CLIENT -q "CREATE USER $USER"
+$CLICKHOUSE_CLIENT -u "$USER" -q "SELECT * FROM system.numbers LIMIT 1"
+$CLICKHOUSE_CLIENT -u "$USER" -q "SELECT * FROM system.numbers LIMIT 1"
+$CLICKHOUSE_CLIENT -q "SELECT user, toBool(ProfileEvents['SelectQuery'] > 0), toBool(ProfileEvents['Query'] > 0) FROM system.user_processes WHERE user='default'"
+$CLICKHOUSE_CLIENT -q "SELECT ProfileEvents['SelectQuery'], ProfileEvents['Query'] FROM system.user_processes WHERE user='$USER'"
+$CLICKHOUSE_CLIENT -q "DROP USER $USER"
 

From 50654435dc1cf6ac826d08d28adf2e669250d5ec Mon Sep 17 00:00:00 2001
From: ltrk2 <107155950+ltrk2@users.noreply.github.com>
Date: Fri, 2 Jun 2023 19:36:37 +0000
Subject: [PATCH 27/64] Implement endianness-independent serialization for UUID

---
 .../Serializations/SerializationUUID.cpp      | 16 ++---
 src/IO/ReadHelpers.cpp                        | 72 +++++++++----------
 src/IO/ReadHelpers.h                          | 15 ++--
 src/IO/WriteHelpers.cpp                       | 38 +++++++---
 src/IO/WriteHelpers.h                         |  7 +-
 .../Formats/Impl/AvroRowInputFormat.cpp       |  3 +-
 .../Formats/Impl/AvroRowOutputFormat.cpp      |  5 +-
 7 files changed, 75 insertions(+), 81 deletions(-)

diff --git a/src/DataTypes/Serializations/SerializationUUID.cpp b/src/DataTypes/Serializations/SerializationUUID.cpp
index ee1327ef094..13313111b2b 100644
--- a/src/DataTypes/Serializations/SerializationUUID.cpp
+++ b/src/DataTypes/Serializations/SerializationUUID.cpp
@@ -51,19 +51,11 @@ void SerializationUUID::deserializeTextQuoted(IColumn & column, ReadBuffer & ist
     {
         assertChar('\'', istr);
         char * next_pos = find_first_symbols<'\\', '\''>(istr.position(), istr.buffer().end());
-        size_t len = next_pos - istr.position();
-        if ((len == 32) && (istr.position()[32] == '\''))
+        const auto len = next_pos - istr.position();
+        if ((len == 32 || len == 36) && istr.position()[len] == '\'')
         {
-            parseUUIDWithoutSeparator(
-                reinterpret_cast<const UInt8 *>(istr.position()), std::reverse_iterator<UInt8 *>(reinterpret_cast<UInt8 *>(&uuid) + 16));
-            istr.ignore(33);
-            fast = true;
-        }
-        else if ((len == 36) && (istr.position()[36] == '\''))
-        {
-            parseUUID(
-                reinterpret_cast<const UInt8 *>(istr.position()), std::reverse_iterator<UInt8 *>(reinterpret_cast<UInt8 *>(&uuid) + 16));
-            istr.ignore(37);
+            uuid = parseUUID(std::span(reinterpret_cast<const UInt8 *>(istr.position()), len));
+            istr.ignore(len + 1);
             fast = true;
         }
         else
diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp
index 99d25ee6613..a85a057f2b3 100644
--- a/src/IO/ReadHelpers.cpp
+++ b/src/IO/ReadHelpers.cpp
@@ -46,48 +46,40 @@ inline void parseHex(IteratorSrc src, IteratorDst dst)
         dst[dst_pos] = unhex2(reinterpret_cast<const char *>(&src[src_pos]));
 }
 
-void parseUUID(const UInt8 * src36, UInt8 * dst16)
+UUID parseUUID(std::span<const UInt8> src)
 {
-    /// If string is not like UUID - implementation specific behaviour.
+    UUID uuid;
+    const auto * src_ptr = src.data();
+    auto * dst = reinterpret_cast<UInt8 *>(&uuid);
+    if (const auto size = src.size(); size == 36)
+    {
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+        parseHex<4>(src_ptr, dst);
+        parseHex<2>(src_ptr + 9, dst + 4);
+        parseHex<2>(src_ptr + 14, dst + 6);
+        parseHex<2>(src_ptr + 19, dst + 8);
+        parseHex<6>(src_ptr + 24, dst + 10);
+#else
+        const std::reverse_iterator dst_it(dst + sizeof(UUID));
+        /// FIXME This code looks like trash.
+        parseHex<4>(src_ptr, dst + 8);
+        parseHex<2>(src_ptr + 9, dst + 12);
+        parseHex<2>(src_ptr + 14, dst + 14);
+        parseHex<2>(src_ptr + 19, dst);
+        parseHex<6>(src_ptr + 24, dst + 2);
+#endif
+    }
+    else if (size == 32)
+    {
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+        parseHex<16>(src_ptr, dst);
+#else
+        parseHex<8>(src_ptr, dst + 8);
+        parseHex<8>(src_ptr + 16, dst);
+#endif
+    }
 
-    parseHex<4>(&src36[0], &dst16[0]);
-    parseHex<2>(&src36[9], &dst16[4]);
-    parseHex<2>(&src36[14], &dst16[6]);
-    parseHex<2>(&src36[19], &dst16[8]);
-    parseHex<6>(&src36[24], &dst16[10]);
-}
-
-void parseUUIDWithoutSeparator(const UInt8 * src36, UInt8 * dst16)
-{
-    /// If string is not like UUID - implementation specific behaviour.
-
-    parseHex<16>(&src36[0], &dst16[0]);
-}
-
-/** Function used when byte ordering is important when parsing uuid
- *  ex: When we create an UUID type
- */
-void parseUUID(const UInt8 * src36, std::reverse_iterator<UInt8 *> dst16)
-{
-    /// If string is not like UUID - implementation specific behaviour.
-
-    /// FIXME This code looks like trash.
-    parseHex<4>(&src36[0], dst16 + 8);
-    parseHex<2>(&src36[9], dst16 + 12);
-    parseHex<2>(&src36[14], dst16 + 14);
-    parseHex<2>(&src36[19], dst16);
-    parseHex<6>(&src36[24], dst16 + 2);
-}
-
-/** Function used when byte ordering is important when parsing uuid
- *  ex: When we create an UUID type
- */
-void parseUUIDWithoutSeparator(const UInt8 * src36, std::reverse_iterator<UInt8 *> dst16)
-{
-    /// If string is not like UUID - implementation specific behaviour.
-
-    parseHex<8>(&src36[0], dst16 + 8);
-    parseHex<8>(&src36[16], dst16);
+    return uuid;
 }
 
 void NO_INLINE throwAtAssertionFailed(const char * s, ReadBuffer & buf)
diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h
index 32338552b66..7e293944d19 100644
--- a/src/IO/ReadHelpers.h
+++ b/src/IO/ReadHelpers.h
@@ -8,6 +8,7 @@
 #include <algorithm>
 #include <iterator>
 #include <bit>
+#include <span>
 
 #include <type_traits>
 
@@ -623,12 +624,6 @@ struct NullOutput
     void push_back(char) {} /// NOLINT
 };
 
-void parseUUID(const UInt8 * src36, UInt8 * dst16);
-void parseUUIDWithoutSeparator(const UInt8 * src36, UInt8 * dst16);
-void parseUUID(const UInt8 * src36, std::reverse_iterator<UInt8 *> dst16);
-void parseUUIDWithoutSeparator(const UInt8 * src36, std::reverse_iterator<UInt8 *> dst16);
-
-
 template <typename ReturnType>
 ReturnType readDateTextFallback(LocalDate & date, ReadBuffer & buf);
 
@@ -770,6 +765,9 @@ inline bool tryReadDateText(ExtendedDayNum & date, ReadBuffer & buf)
     return readDateTextImpl<bool>(date, buf);
 }
 
+/// If string is not like UUID - implementation specific behaviour.
+UUID parseUUID(std::span<const UInt8> src);
+
 template <typename ReturnType = void>
 inline ReturnType readUUIDTextImpl(UUID & uuid, ReadBuffer & buf)
 {
@@ -797,12 +795,9 @@ inline ReturnType readUUIDTextImpl(UUID & uuid, ReadBuffer & buf)
                     return ReturnType(false);
                 }
             }
-
-            parseUUID(reinterpret_cast<const UInt8 *>(s), std::reverse_iterator<UInt8 *>(reinterpret_cast<UInt8 *>(&uuid) + 16));
         }
-        else
-            parseUUIDWithoutSeparator(reinterpret_cast<const UInt8 *>(s), std::reverse_iterator<UInt8 *>(reinterpret_cast<UInt8 *>(&uuid) + 16));
 
+        uuid = parseUUID({reinterpret_cast<const UInt8 *>(s), size});
         return ReturnType(true);
     }
     else
diff --git a/src/IO/WriteHelpers.cpp b/src/IO/WriteHelpers.cpp
index a0eceddc6f6..6023d4c9d5b 100644
--- a/src/IO/WriteHelpers.cpp
+++ b/src/IO/WriteHelpers.cpp
@@ -23,17 +23,35 @@ void formatHex(IteratorSrc src, IteratorDst dst, size_t num_bytes)
 /** Function used when byte ordering is important when parsing uuid
  *  ex: When we create an UUID type
  */
-void formatUUID(std::reverse_iterator<const UInt8 *> src16, UInt8 * dst36)
+std::array<char, 36> formatUUID(const UUID & uuid)
 {
-    formatHex(src16 + 8, &dst36[0], 4);
-    dst36[8] = '-';
-    formatHex(src16 + 12, &dst36[9], 2);
-    dst36[13] = '-';
-    formatHex(src16 + 14, &dst36[14], 2);
-    dst36[18] = '-';
-    formatHex(src16, &dst36[19], 2);
-    dst36[23] = '-';
-    formatHex(src16 + 2, &dst36[24], 6);
+    std::array<char, 36> dst;
+    const auto * src_ptr = reinterpret_cast<const UInt8 *>(&uuid);
+    auto * dst_ptr = dst.data();
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+    formatHex(src_ptr, dst_ptr, 4);
+    dst[8] = '-';
+    formatHex(src_ptr + 4, dst_ptr + 9, 2);
+    dst[13] = '-';
+    formatHex(src_ptr + 6, dst_ptr + 14, 2);
+    dst[18] = '-';
+    formatHex(src_ptr + 8, dst_ptr + 19, 2);
+    dst[23] = '-';
+    formatHex(src_ptr + 10, dst_ptr + 24, 6);
+#else
+    const std::reverse_iterator src_it(src_ptr + 16);
+    formatHex(src_it + 8, dst_ptr, 4);
+    dst[8] = '-';
+    formatHex(src_it + 12, dst_ptr + 9, 2);
+    dst[13] = '-';
+    formatHex(src_it + 14, dst_ptr + 14, 2);
+    dst[18] = '-';
+    formatHex(src_it, dst_ptr + 19, 2);
+    dst[23] = '-';
+    formatHex(src_it + 2, dst_ptr + 24, 6);
+#endif
+
+    return dst;
 }
 
 void writeIPv4Text(const IPv4 & ip, WriteBuffer & buf)
diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h
index cdbc952690c..923684c4249 100644
--- a/src/IO/WriteHelpers.h
+++ b/src/IO/WriteHelpers.h
@@ -625,13 +625,12 @@ inline void writeXMLStringForTextElement(std::string_view s, WriteBuffer & buf)
     writeXMLStringForTextElement(s.data(), s.data() + s.size(), buf);
 }
 
-void formatUUID(std::reverse_iterator<const UInt8 *> src16, UInt8 * dst36);
+std::array<char, 36> formatUUID(const UUID & uuid);
 
 inline void writeUUIDText(const UUID & uuid, WriteBuffer & buf)
 {
-    char s[36];
-    formatUUID(std::reverse_iterator<const UInt8 *>(reinterpret_cast<const UInt8 *>(&uuid) + 16), reinterpret_cast<UInt8 *>(s));
-    buf.write(s, sizeof(s));
+    const auto text = formatUUID(uuid);
+    buf.write(text.data(), text.size());
 }
 
 void writeIPv4Text(const IPv4 & ip, WriteBuffer & buf);
diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
index c2602a4d1d5..974b198a483 100644
--- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
@@ -256,8 +256,7 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(const avro
                     if (tmp.length() != 36)
                         throw ParsingException(ErrorCodes::CANNOT_PARSE_UUID, "Cannot parse uuid {}", tmp);
 
-                    UUID uuid;
-                    parseUUID(reinterpret_cast<const UInt8 *>(tmp.data()), std::reverse_iterator<UInt8 *>(reinterpret_cast<UInt8 *>(&uuid) + 16));
+                    const auto uuid = parseUUID({reinterpret_cast<const UInt8 *>(tmp.data()), tmp.length()});
                     assert_cast<DataTypeUUID::ColumnType &>(column).insertValue(uuid);
                     return true;
                 };
diff --git a/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp b/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp
index c743b2c1766..2b163164d56 100644
--- a/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp
@@ -329,9 +329,8 @@ AvroSerializer::SchemaWithSerializeFn AvroSerializer::createSchemaWithSerializeF
             return {schema, [](const IColumn & column, size_t row_num, avro::Encoder & encoder)
             {
                 const auto & uuid = assert_cast<const DataTypeUUID::ColumnType &>(column).getElement(row_num);
-                std::array<UInt8, 36> s;
-                formatUUID(std::reverse_iterator<const UInt8 *>(reinterpret_cast<const UInt8 *>(&uuid) + 16), s.data());
-                encoder.encodeBytes(reinterpret_cast<const uint8_t *>(s.data()), s.size());
+                const auto text = formatUUID(uuid);
+                encoder.encodeBytes(reinterpret_cast<const uint8_t *>(text.data()), text.size());
             }};
         }
         case TypeIndex::Array:

From 87eaaa0f7bf43a7145c24e726af8b3b912f38eea Mon Sep 17 00:00:00 2001
From: DanRoscigno <dan@roscigno.com>
Date: Fri, 2 Jun 2023 16:30:18 -0400
Subject: [PATCH 28/64] address review comments

---
 .../table-engines/integrations/embedded-rocksdb.md  | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/docs/en/engines/table-engines/integrations/embedded-rocksdb.md b/docs/en/engines/table-engines/integrations/embedded-rocksdb.md
index dab741a9f63..6664b6a4613 100644
--- a/docs/en/engines/table-engines/integrations/embedded-rocksdb.md
+++ b/docs/en/engines/table-engines/integrations/embedded-rocksdb.md
@@ -127,15 +127,17 @@ A special `direct` join with EmbeddedRocksDB tables is supported.
 This direct join avoids forming a hash table in memory and accesses
 the data directly from the EmbeddedRocksDB.
 
+With large joins you may see much lower memory usage with direct joins
+because the hash table is not created.
+
 To enable direct joins:
 ```sql
-SET join_algorithm = 'direct'
+SET join_algorithm = 'direct, hash'
 ```
 
 :::tip
-When the `join_algorithm` is set to `direct`, direct joins will be used
-when possible.  However, direct joins are not used for RIGHT or FULL JOINs.
-ClickHouse will choose another join algorithm when direct joins are not possible.
+When the `join_algorithm` is set to `direct, hash`, direct joins will be used
+when possible, and hash otherwise.
 :::
 
 #### Example
@@ -205,3 +207,6 @@ ORDER BY key ASC
 └─────┴─────────┴────────┴────────┘
 ```
 
+### More information on Joins
+- [`join_algorithm` setting](/docs/en/operations/settings/settings.md#settings-join_algorithm)
+- [JOIN clause](/docs/en/sql-reference/statements/select/join.md)

From 4506299d73a3fbf8fc9446b3eed05fe4d5553c23 Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Tue, 30 May 2023 20:53:45 +0200
Subject: [PATCH 29/64] impl

---
 docker/test/performance-comparison/report.py | 4 +++-
 tests/ci/performance_comparison_check.py     | 8 +++++++-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/docker/test/performance-comparison/report.py b/docker/test/performance-comparison/report.py
index 214f2d550b4..a1f2eb9d9ec 100755
--- a/docker/test/performance-comparison/report.py
+++ b/docker/test/performance-comparison/report.py
@@ -626,7 +626,9 @@ if args.report == "main":
         message_array.append(str(faster_queries) + " faster")
 
     if slower_queries:
-        if slower_queries > 3:
+        # This threshold should be synchronized with the value in https://github.com/ClickHouse/ClickHouse/blob/master/tests/ci/performance_comparison_check.py#L225
+        # False positives rate should be < 1%: https://shorturl.at/CDEK8
+        if slower_queries > 5:
             status = "failure"
         message_array.append(str(slower_queries) + " slower")
 
diff --git a/tests/ci/performance_comparison_check.py b/tests/ci/performance_comparison_check.py
index bf5704f31bd..1baf547816f 100644
--- a/tests/ci/performance_comparison_check.py
+++ b/tests/ci/performance_comparison_check.py
@@ -219,6 +219,12 @@ if __name__ == "__main__":
     except Exception:
         traceback.print_exc()
 
+    def too_many_slow(msg):
+        match = re.search("(|.* )(\d+) slower.*", msg)
+        # This threshold should be synchronized with the value in https://github.com/ClickHouse/ClickHouse/blob/master/docker/test/performance-comparison/report.py#L629
+        threshold = 5
+        return int(match.group(2).strip()) > threshold if match else False
+
     # Try to fetch status from the report.
     status = ""
     message = ""
@@ -236,7 +242,7 @@ if __name__ == "__main__":
 
         # TODO: Remove me, always green mode for the first time, unless errors
         status = "success"
-        if "errors" in message.lower():
+        if "errors" in message.lower() or too_many_slow(message.lower()):
             status = "failure"
         # TODO: Remove until here
     except Exception:

From e548dce123debf4864348d606629f90844b5e5f8 Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Sat, 3 Jun 2023 00:08:47 +0200
Subject: [PATCH 30/64] fix

---
 tests/ci/performance_comparison_check.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/ci/performance_comparison_check.py b/tests/ci/performance_comparison_check.py
index 1baf547816f..41ace95c350 100644
--- a/tests/ci/performance_comparison_check.py
+++ b/tests/ci/performance_comparison_check.py
@@ -220,7 +220,7 @@ if __name__ == "__main__":
         traceback.print_exc()
 
     def too_many_slow(msg):
-        match = re.search("(|.* )(\d+) slower.*", msg)
+        match = re.search(r"(|.* )(\d+) slower.*", msg)
         # This threshold should be synchronized with the value in https://github.com/ClickHouse/ClickHouse/blob/master/docker/test/performance-comparison/report.py#L629
         threshold = 5
         return int(match.group(2).strip()) > threshold if match else False

From 7a7e03a2ffbd879afd5971de6de13c7919a89157 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Sat, 3 Jun 2023 14:16:59 +0300
Subject: [PATCH 31/64] Function if constant folding

---
 src/Functions/if.cpp                          | 25 +++++++++++++++++++
 .../25337_if_constant_folding.reference       |  5 ++++
 .../0_stateless/25337_if_constant_folding.sql |  1 +
 3 files changed, 31 insertions(+)
 create mode 100644 tests/queries/0_stateless/25337_if_constant_folding.reference
 create mode 100644 tests/queries/0_stateless/25337_if_constant_folding.sql

diff --git a/src/Functions/if.cpp b/src/Functions/if.cpp
index 93bdf406f9d..d00e83c4eb7 100644
--- a/src/Functions/if.cpp
+++ b/src/Functions/if.cpp
@@ -1116,6 +1116,31 @@ public:
 
         return res;
     }
+
+    ColumnPtr getConstantResultForNonConstArguments(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const override
+    {
+        const ColumnWithTypeAndName & arg_cond = arguments[0];
+        if (!arg_cond.column || !isColumnConst(*arg_cond.column)) {
+            return {};
+        }
+
+        const ColumnConst * cond_const_col = checkAndGetColumnConst<ColumnVector<UInt8>>(arg_cond.column.get());
+        bool condition_value = cond_const_col->getValue<UInt8>();
+
+        const ColumnWithTypeAndName & arg_then = arguments[1];
+        const ColumnWithTypeAndName & arg_else = arguments[2];
+        const ColumnWithTypeAndName & potential_const_column = condition_value ? arg_then : arg_else;
+
+        if (!potential_const_column.column || !isColumnConst(*potential_const_column.column))
+            return {};
+
+        auto result = castColumn(potential_const_column, result_type);
+        if (!isColumnConst(*result)) {
+            return {};
+        }
+
+        return result;
+    }
 };
 
 }
diff --git a/tests/queries/0_stateless/25337_if_constant_folding.reference b/tests/queries/0_stateless/25337_if_constant_folding.reference
new file mode 100644
index 00000000000..9dfcf39f5a7
--- /dev/null
+++ b/tests/queries/0_stateless/25337_if_constant_folding.reference
@@ -0,0 +1,5 @@
+0
+1
+2
+3
+4
diff --git a/tests/queries/0_stateless/25337_if_constant_folding.sql b/tests/queries/0_stateless/25337_if_constant_folding.sql
new file mode 100644
index 00000000000..1610465021b
--- /dev/null
+++ b/tests/queries/0_stateless/25337_if_constant_folding.sql
@@ -0,0 +1 @@
+SELECT cast(number, if(1 = 1, 'UInt64', toString(number))) FROM numbers(5);

From 13a122697139f80f34ca006f691fd1f4f20e8528 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 25 May 2023 16:09:05 +0200
Subject: [PATCH 32/64] Fix nested distributed SELECT in WITH clause

For the CSE (common scalar expression, form of CTE) form of WITH it will
set max_result_rows to 1, since there should not be more rows, but this
will be applied for the DESC TABLE as well (service distributed query
that required to obtain the structure).

Note, that it is a problem only for nested distributed queries because
getSubqueryContext() from InterpreterSelectQuery.cpp resets those limits
as well, but this does not helps, for the nested DESC since it is
executed on the remote node that has max_result_rows=1.

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Storages/getStructureOfRemoteTable.cpp               | 9 +++++++++
 .../0_stateless/02768_cse_nested_distributed.reference   | 3 +++
 .../queries/0_stateless/02768_cse_nested_distributed.sql | 5 +++++
 3 files changed, 17 insertions(+)
 create mode 100644 tests/queries/0_stateless/02768_cse_nested_distributed.reference
 create mode 100644 tests/queries/0_stateless/02768_cse_nested_distributed.sql

diff --git a/src/Storages/getStructureOfRemoteTable.cpp b/src/Storages/getStructureOfRemoteTable.cpp
index b2737249166..e5fc01be9f4 100644
--- a/src/Storages/getStructureOfRemoteTable.cpp
+++ b/src/Storages/getStructureOfRemoteTable.cpp
@@ -60,6 +60,15 @@ ColumnsDescription getStructureOfRemoteTableInShard(
     ColumnsDescription res;
     auto new_context = ClusterProxy::updateSettingsForCluster(cluster, context, context->getSettingsRef(), table_id);
 
+    /// Ignore limit for result number of rows (that could be set during handling CSE/CTE),
+    /// since this is a service query and should not lead to query failure.
+    {
+        Settings new_settings = new_context->getSettings();
+        new_settings.max_result_rows = 0;
+        new_settings.max_result_bytes = 0;
+        new_context->setSettings(new_settings);
+    }
+
     /// Expect only needed columns from the result of DESC TABLE. NOTE 'comment' column is ignored for compatibility reasons.
     Block sample_block
     {
diff --git a/tests/queries/0_stateless/02768_cse_nested_distributed.reference b/tests/queries/0_stateless/02768_cse_nested_distributed.reference
new file mode 100644
index 00000000000..e8183f05f5d
--- /dev/null
+++ b/tests/queries/0_stateless/02768_cse_nested_distributed.reference
@@ -0,0 +1,3 @@
+1
+1
+1
diff --git a/tests/queries/0_stateless/02768_cse_nested_distributed.sql b/tests/queries/0_stateless/02768_cse_nested_distributed.sql
new file mode 100644
index 00000000000..90e526c0d01
--- /dev/null
+++ b/tests/queries/0_stateless/02768_cse_nested_distributed.sql
@@ -0,0 +1,5 @@
+with (select count() > 0 from remote('127.2', system.settings)) as s select s;
+-- nested
+with (select count() > 0 from remote('127.2', remote('127.2', system.settings))) as s select s;
+-- nested via view()
+with (select count() > 0 from remote('127.2', view(select count() from remote('127.2', system.settings)))) as s select s;

From e28dfb7ea851844c943a4dbab33dcf6d2f468f4e Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Sat, 3 Jun 2023 21:44:31 +0300
Subject: [PATCH 33/64] Updated tests

---
 tests/queries/0_stateless/00835_if_generic_case.reference | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/00835_if_generic_case.reference b/tests/queries/0_stateless/00835_if_generic_case.reference
index 84c036b17ca..8f9c3f6ef13 100644
--- a/tests/queries/0_stateless/00835_if_generic_case.reference
+++ b/tests/queries/0_stateless/00835_if_generic_case.reference
@@ -3,7 +3,7 @@
 2000-01-01 00:00:00	2000-01-02	2000-01-02 00:00:00
 2000-01-01 00:00:00	2000-01-02	2000-01-02 00:00:00
 2000-01-01 00:00:00	2000-01-02	2000-01-02 00:00:00
-2000-01-01 00:00:00	2000-01-02	2000-01-02
+2000-01-01 00:00:00	2000-01-02	2000-01-02 00:00:00
 2000-01-01 00:00:00	2000-01-02	2000-01-02
 2000-01-01 00:00:00	2000-01-02	2000-01-02
 2000-01-01 00:00:00	2000-01-02	2000-01-01 00:00:00

From db806bd394c7b7dfe42f225f3c1ad7b1be1f2ea9 Mon Sep 17 00:00:00 2001
From: auxten <auxten@users.noreply.github.com>
Date: Sun, 4 Jun 2023 17:44:29 +0800
Subject: [PATCH 34/64] Resize underlying vector only pos_offset ==
 vector.size()

---
 src/IO/WriteBufferFromVector.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/IO/WriteBufferFromVector.h b/src/IO/WriteBufferFromVector.h
index 4b2a3581625..c793a34b406 100644
--- a/src/IO/WriteBufferFromVector.h
+++ b/src/IO/WriteBufferFromVector.h
@@ -86,7 +86,10 @@ private:
         size_t old_size = vector.size();
         /// pos may not be equal to vector.data() + old_size, because WriteBuffer::next() can be used to flush data
         size_t pos_offset = pos - reinterpret_cast<Position>(vector.data());
-        vector.resize(old_size * size_multiplier);
+        if (pos_offset == vector.size())
+        {
+            vector.resize(old_size * size_multiplier);
+        }
         internal_buffer = Buffer(reinterpret_cast<Position>(vector.data() + pos_offset), reinterpret_cast<Position>(vector.data() + vector.size()));
         working_buffer = internal_buffer;
     }

From 46cbdeeb7e7975b25de35fb75912da3a7ece21ec Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Sun, 4 Jun 2023 15:02:46 +0300
Subject: [PATCH 35/64] Fixed tests

---
 src/Functions/if.cpp                                        | 6 ++----
 ...olding.reference => 02771_if_constant_folding.reference} | 0
 ...f_constant_folding.sql => 02771_if_constant_folding.sql} | 0
 3 files changed, 2 insertions(+), 4 deletions(-)
 rename tests/queries/0_stateless/{25337_if_constant_folding.reference => 02771_if_constant_folding.reference} (100%)
 rename tests/queries/0_stateless/{25337_if_constant_folding.sql => 02771_if_constant_folding.sql} (100%)

diff --git a/src/Functions/if.cpp b/src/Functions/if.cpp
index d00e83c4eb7..8d43b3a4ca3 100644
--- a/src/Functions/if.cpp
+++ b/src/Functions/if.cpp
@@ -1120,9 +1120,8 @@ public:
     ColumnPtr getConstantResultForNonConstArguments(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const override
     {
         const ColumnWithTypeAndName & arg_cond = arguments[0];
-        if (!arg_cond.column || !isColumnConst(*arg_cond.column)) {
+        if (!arg_cond.column || !isColumnConst(*arg_cond.column))
             return {};
-        }
 
         const ColumnConst * cond_const_col = checkAndGetColumnConst<ColumnVector<UInt8>>(arg_cond.column.get());
         bool condition_value = cond_const_col->getValue<UInt8>();
@@ -1135,9 +1134,8 @@ public:
             return {};
 
         auto result = castColumn(potential_const_column, result_type);
-        if (!isColumnConst(*result)) {
+        if (!isColumnConst(*result))
             return {};
-        }
 
         return result;
     }
diff --git a/tests/queries/0_stateless/25337_if_constant_folding.reference b/tests/queries/0_stateless/02771_if_constant_folding.reference
similarity index 100%
rename from tests/queries/0_stateless/25337_if_constant_folding.reference
rename to tests/queries/0_stateless/02771_if_constant_folding.reference
diff --git a/tests/queries/0_stateless/25337_if_constant_folding.sql b/tests/queries/0_stateless/02771_if_constant_folding.sql
similarity index 100%
rename from tests/queries/0_stateless/25337_if_constant_folding.sql
rename to tests/queries/0_stateless/02771_if_constant_folding.sql

From 0f4dd26cebbcf9201124287e8d31acda92a9e9f7 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Sun, 4 Jun 2023 16:03:44 +0200
Subject: [PATCH 36/64] Add async iteration to object storage

---
 src/Common/CurrentMetrics.cpp                 |  4 +
 .../AzureBlobStorage/AzureObjectStorage.cpp   | 69 ++++++++++++++++++
 .../AzureBlobStorage/AzureObjectStorage.h     |  2 +
 src/Disks/ObjectStorages/IObjectStorage.cpp   |  9 +++
 src/Disks/ObjectStorages/IObjectStorage.h     |  7 ++
 .../ObjectStorages/ObjectStorageIterator.cpp  | 20 +++++
 .../ObjectStorages/ObjectStorageIterator.h    | 53 ++++++++++++++
 .../ObjectStorageIteratorAsync.cpp            | 64 ++++++++++++++++
 .../ObjectStorageIteratorAsync.h              | 58 +++++++++++++++
 .../ObjectStorages/S3/S3ObjectStorage.cpp     | 73 +++++++++++++++++++
 src/Disks/ObjectStorages/S3/S3ObjectStorage.h |  2 +
 11 files changed, 361 insertions(+)
 create mode 100644 src/Disks/ObjectStorages/ObjectStorageIterator.cpp
 create mode 100644 src/Disks/ObjectStorages/ObjectStorageIterator.h
 create mode 100644 src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp
 create mode 100644 src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h

diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp
index 82d68ca8185..4c858ee788d 100644
--- a/src/Common/CurrentMetrics.cpp
+++ b/src/Common/CurrentMetrics.cpp
@@ -131,6 +131,10 @@
     M(DistributedInsertThreadsActive, "Number of threads used for INSERT into Distributed running a task.") \
     M(StorageS3Threads, "Number of threads in the StorageS3 thread pool.") \
     M(StorageS3ThreadsActive, "Number of threads in the StorageS3 thread pool running a task.") \
+    M(ObjectStorageS3Threads, "Number of threads in the S3ObjectStorage thread pool.") \
+    M(ObjectStorageS3ThreadsActive, "Number of threads in the S3ObjectStorage thread pool running a task.") \
+    M(ObjectStorageAzureThreads, "Number of threads in the AzureObjectStorage thread pool.") \
+    M(ObjectStorageAzureThreadsActive, "Number of threads in the AzureObjectStorage thread pool running a task.") \
     M(MergeTreePartsLoaderThreads, "Number of threads in the MergeTree parts loader thread pool.") \
     M(MergeTreePartsLoaderThreadsActive, "Number of threads in the MergeTree parts loader thread pool running a task.") \
     M(MergeTreePartsCleanerThreads, "Number of threads in the MergeTree parts cleaner thread pool.") \
diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
index 0358b4e915a..23a0da39dd3 100644
--- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
@@ -11,9 +11,16 @@
 #include <Disks/IO/AsynchronousBoundedReadBuffer.h>
 
 #include <Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.h>
+#include <Disks/ObjectStorages/ObjectStorageIteratorAsync.h>
 #include <Interpreters/Context.h>
 #include <Common/logger_useful.h>
 
+namespace CurrentMetrics
+{
+    extern const Metric ObjectStorageAzureThreads;
+    extern const Metric ObjectStorageAzureThreadsActive;
+
+}
 
 namespace DB
 {
@@ -26,6 +33,60 @@ namespace ErrorCodes
 }
 
 
+namespace
+{
+
+class AzureIteratorAsync final : public IObjectStorageIteratorAsync
+{
+public:
+    AzureIteratorAsync(
+        const std::string & path_prefix,
+        std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> client_,
+        size_t max_list_size)
+        : IObjectStorageIteratorAsync(
+            CurrentMetrics::ObjectStorageAzureThreads,
+            CurrentMetrics::ObjectStorageAzureThreadsActive,
+            "ListObjectAzure")
+        , client(client_)
+    {
+
+        options.Prefix = path_prefix;
+        options.PageSizeHint = static_cast<int>(max_list_size);
+    }
+
+private:
+    bool getBatchAndCheckNext(RelativePathsWithMetadata & batch) override
+    {
+        auto outcome = client->ListBlobs(options);
+        auto blob_list_response = client->ListBlobs(options);
+        auto blobs_list = blob_list_response.Blobs;
+
+        for (const auto & blob : blobs_list)
+        {
+            batch.emplace_back(
+                blob.Name,
+                ObjectMetadata{
+                    static_cast<uint64_t>(blob.BlobSize),
+                    Poco::Timestamp::fromEpochTime(
+                        std::chrono::duration_cast<std::chrono::seconds>(
+                            blob.Details.LastModified.time_since_epoch()).count()),
+                    {}});
+        }
+
+        options.ContinuationToken = blob_list_response.NextPageToken;
+        if (blob_list_response.HasPage())
+            return true;
+
+        return false;
+    }
+
+    std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> client;
+    Azure::Storage::Blobs::ListBlobsOptions options;
+};
+
+}
+
+
 AzureObjectStorage::AzureObjectStorage(
     const String & name_,
     AzureClientPtr && client_,
@@ -67,6 +128,14 @@ bool AzureObjectStorage::exists(const StoredObject & object) const
     return false;
 }
 
+ObjectStorageIteratorPtr AzureObjectStorage::iterate(const std::string & path_prefix) const
+{
+    auto settings_ptr = settings.get();
+    auto client_ptr = client.get();
+
+    return std::make_shared<AzureIteratorAsync>(path_prefix, client_ptr, settings_ptr->list_object_keys_size);
+}
+
 void AzureObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const
 {
     auto client_ptr = client.get();
diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
index a36a03bcda4..5b08ceb80e3 100644
--- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
+++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
@@ -60,6 +60,8 @@ public:
 
     void listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const override;
 
+    ObjectStorageIteratorPtr iterate(const std::string & path_prefix) const override;
+
     DataSourceDescription getDataSourceDescription() const override { return data_source_description; }
 
     std::string getName() const override { return "AzureObjectStorage"; }
diff --git a/src/Disks/ObjectStorages/IObjectStorage.cpp b/src/Disks/ObjectStorages/IObjectStorage.cpp
index a5903f9d429..ea22294224c 100644
--- a/src/Disks/ObjectStorages/IObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/IObjectStorage.cpp
@@ -5,6 +5,7 @@
 #include <IO/copyData.h>
 #include <IO/ReadBufferFromFileBase.h>
 #include <Interpreters/Context.h>
+#include <Disks/ObjectStorages/ObjectStorageIterator.h>
 
 
 namespace DB
@@ -29,6 +30,14 @@ void IObjectStorage::listObjects(const std::string &, RelativePathsWithMetadata
 }
 
 
+ObjectStorageIteratorPtr IObjectStorage::iterate(const std::string & path_prefix) const
+{
+    RelativePathsWithMetadata files;
+    listObjects(path_prefix, files, 0);
+
+    return std::make_shared<ObjectStorageIteratorFromList>(std::move(files));
+}
+
 std::optional<ObjectMetadata> IObjectStorage::tryGetObjectMetadata(const std::string & path) const
 {
     try
diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h
index 28de80a88cd..32f9d1ba764 100644
--- a/src/Disks/ObjectStorages/IObjectStorage.h
+++ b/src/Disks/ObjectStorages/IObjectStorage.h
@@ -20,6 +20,9 @@
 #include <Disks/WriteMode.h>
 #include <Interpreters/Context_fwd.h>
 #include <Core/Types.h>
+#include <Disks/DirectoryIterator.h>
+#include <Common/ThreadPool.h>
+#include <Interpreters/threadPoolCallbackRunner.h>
 
 
 namespace DB
@@ -51,6 +54,8 @@ struct RelativePathWithMetadata
 
 using RelativePathsWithMetadata = std::vector<RelativePathWithMetadata>;
 
+class IObjectStorageIterator;
+using ObjectStorageIteratorPtr = std::shared_ptr<IObjectStorageIterator>;
 
 /// Base class for all object storages which implement some subset of ordinary filesystem operations.
 ///
@@ -75,6 +80,8 @@ public:
 
     virtual void listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const;
 
+    virtual ObjectStorageIteratorPtr iterate(const std::string & path_prefix) const;
+
     /// Get object metadata if supported. It should be possible to receive
     /// at least size of object
     virtual std::optional<ObjectMetadata> tryGetObjectMetadata(const std::string & path) const;
diff --git a/src/Disks/ObjectStorages/ObjectStorageIterator.cpp b/src/Disks/ObjectStorages/ObjectStorageIterator.cpp
new file mode 100644
index 00000000000..188b743958c
--- /dev/null
+++ b/src/Disks/ObjectStorages/ObjectStorageIterator.cpp
@@ -0,0 +1,20 @@
+#include <Disks/ObjectStorages/ObjectStorageIterator.h>
+#include <Common/Exception.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+
+RelativePathWithMetadata ObjectStorageIteratorFromList::current() const
+{
+    if (!isValid())
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to access invalid iterator");
+
+    return *batch_iterator;
+}
+
+}
diff --git a/src/Disks/ObjectStorages/ObjectStorageIterator.h b/src/Disks/ObjectStorages/ObjectStorageIterator.h
new file mode 100644
index 00000000000..c3afd395a74
--- /dev/null
+++ b/src/Disks/ObjectStorages/ObjectStorageIterator.h
@@ -0,0 +1,53 @@
+#pragma once
+
+#include <Disks/ObjectStorages/IObjectStorage.h>
+#include <memory>
+
+namespace DB
+{
+
+class IObjectStorageIterator
+{
+public:
+    virtual void next() = 0;
+    virtual bool isValid() const = 0;
+    virtual RelativePathWithMetadata current() const = 0;
+    virtual size_t getAccumulatedSize() const = 0;
+
+    virtual ~IObjectStorageIterator() = default;
+};
+
+using ObjectStorageIteratorPtr = std::shared_ptr<IObjectStorageIterator>;
+
+class ObjectStorageIteratorFromList : public IObjectStorageIterator
+{
+public:
+    explicit ObjectStorageIteratorFromList(RelativePathsWithMetadata && batch_)
+        : batch(std::move(batch_))
+        , batch_iterator(batch.begin())
+    {
+    }
+
+    void next() override
+    {
+        if (isValid())
+            ++batch_iterator;
+    }
+
+    bool isValid() const override
+    {
+        return batch_iterator != batch.end();
+    }
+
+    RelativePathWithMetadata current() const override;
+
+    size_t getAccumulatedSize() const override
+    {
+        return batch.size();
+    }
+private:
+    RelativePathsWithMetadata batch;
+    RelativePathsWithMetadata::iterator batch_iterator;
+};
+
+}
diff --git a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp
new file mode 100644
index 00000000000..766071cf815
--- /dev/null
+++ b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp
@@ -0,0 +1,64 @@
+#include <Disks/ObjectStorages/ObjectStorageIteratorAsync.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+
+void IObjectStorageIteratorAsync::next()
+{
+    std::lock_guard lock(mutex);
+
+    if (current_batch_iterator != current_batch.end())
+    {
+        ++current_batch_iterator;
+    }
+    else if (!is_finished)
+    {
+        if (outcome_future.valid())
+        {
+            BatchAndHasNext next_batch = outcome_future.get();
+            current_batch = std::move(next_batch.batch);
+            accumulated_size.fetch_add(current_batch.size(), std::memory_order_relaxed);
+            current_batch_iterator = current_batch.begin();
+            if (next_batch.has_next)
+                outcome_future = scheduleBatch();
+            else
+                is_finished = true;
+        }
+    }
+}
+
+std::future<IObjectStorageIteratorAsync::BatchAndHasNext> IObjectStorageIteratorAsync::scheduleBatch()
+{
+    return list_objects_scheduler([this]
+    {
+        BatchAndHasNext result;
+        result.has_next = getBatchAndCheckNext(result.batch);
+        return result;
+    }, Priority{});
+}
+
+
+bool IObjectStorageIteratorAsync::isValid() const
+{
+    return current_batch_iterator != current_batch.end();
+}
+
+RelativePathWithMetadata IObjectStorageIteratorAsync::current() const
+{
+    if (!isValid())
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to access invalid iterator");
+
+    return *current_batch_iterator;
+}
+
+size_t IObjectStorageIteratorAsync::getAccumulatedSize() const
+{
+    return accumulated_size.load(std::memory_order_relaxed);
+}
+
+}
diff --git a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h
new file mode 100644
index 00000000000..81ba9bce137
--- /dev/null
+++ b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h
@@ -0,0 +1,58 @@
+#pragma once
+
+#include <Disks/ObjectStorages/ObjectStorageIterator.h>
+#include <Common/ThreadPool.h>
+#include <Interpreters/threadPoolCallbackRunner.h>
+#include <mutex>
+#include <Common/CurrentMetrics.h>
+
+namespace DB
+{
+
+class IObjectStorageIteratorAsync : public IObjectStorageIterator
+{
+public:
+    IObjectStorageIteratorAsync(
+        CurrentMetrics::Metric threads_metric,
+        CurrentMetrics::Metric threads_active_metric,
+        const std::string & thread_name)
+        : list_objects_pool(threads_metric, threads_active_metric, 1)
+        , list_objects_scheduler(threadPoolCallbackRunner<BatchAndHasNext>(list_objects_pool, thread_name))
+    {
+    }
+
+    void next() override;
+    bool isValid() const override;
+    RelativePathWithMetadata current() const override;
+    size_t getAccumulatedSize() const override;
+
+    ~IObjectStorageIteratorAsync() override
+    {
+        list_objects_pool.wait();
+    }
+
+protected:
+
+    virtual bool getBatchAndCheckNext(RelativePathsWithMetadata & batch) = 0;
+
+    struct BatchAndHasNext
+    {
+        RelativePathsWithMetadata batch;
+        bool has_next;
+    };
+
+    std::future<BatchAndHasNext> scheduleBatch();
+
+    bool is_finished{false};
+
+    std::mutex mutex;
+    ThreadPool list_objects_pool;
+    ThreadPoolCallbackRunner<BatchAndHasNext> list_objects_scheduler;
+    std::future<BatchAndHasNext> outcome_future;
+    RelativePathsWithMetadata current_batch;
+    RelativePathsWithMetadata::iterator current_batch_iterator;
+    std::atomic<size_t> accumulated_size = 0;
+};
+
+
+}
diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
index 6e63efcc1e3..d19be20f920 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
@@ -3,6 +3,7 @@
 #if USE_AWS_S3
 
 #include <IO/S3Common.h>
+#include <Disks/ObjectStorages/ObjectStorageIteratorAsync.h>
 
 #include <Disks/IO/ReadBufferFromRemoteFSGather.h>
 #include <Disks/ObjectStorages/DiskObjectStorageCommon.h>
@@ -33,6 +34,13 @@ namespace ProfileEvents
     extern const Event DiskS3ListObjects;
 }
 
+namespace CurrentMetrics
+{
+    extern const Metric ObjectStorageS3Threads;
+    extern const Metric ObjectStorageS3ThreadsActive;
+}
+
+
 namespace DB
 {
 
@@ -84,6 +92,62 @@ void logIfError(const Aws::Utils::Outcome<Result, Error> & response, std::functi
 
 }
 
+namespace
+{
+
+class S3IteratorAsync final : public IObjectStorageIteratorAsync
+{
+public:
+    S3IteratorAsync(
+        const std::string & bucket,
+        const std::string & path_prefix,
+        std::shared_ptr<const S3::Client> client_,
+        size_t max_list_size)
+        : IObjectStorageIteratorAsync(
+            CurrentMetrics::ObjectStorageS3Threads,
+            CurrentMetrics::ObjectStorageS3ThreadsActive,
+            "ListObjectS3")
+        , client(client_)
+    {
+        request.SetBucket(bucket);
+        request.SetPrefix(path_prefix);
+        request.SetMaxKeys(static_cast<int>(max_list_size));
+    }
+
+private:
+    bool getBatchAndCheckNext(RelativePathsWithMetadata & batch) override
+    {
+        ProfileEvents::increment(ProfileEvents::S3ListObjects);
+
+        bool result = false;
+        auto outcome = client->ListObjectsV2(request);
+        /// Outcome failure will be handled on the caller side.
+        if (outcome.IsSuccess())
+        {
+            auto objects = outcome.GetResult().GetContents();
+
+            result = !objects.empty();
+
+            for (const auto & object : objects)
+                batch.emplace_back(object.GetKey(), ObjectMetadata{static_cast<uint64_t>(object.GetSize()), Poco::Timestamp::fromEpochTime(object.GetLastModified().Seconds()), {}});
+
+            if (result)
+                request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken());
+
+            return result;
+        }
+
+        throw Exception(ErrorCodes::S3_ERROR, "Could not list objects in bucket {} with prefix {}, S3 exception: {}, message: {}",
+                quoteString(request.GetBucket()), quoteString(request.GetPrefix()),
+                backQuote(outcome.GetError().GetExceptionName()), quoteString(outcome.GetError().GetMessage()));
+    }
+
+    std::shared_ptr<const S3::Client> client;
+    S3::ListObjectsV2Request request;
+};
+
+}
+
 bool S3ObjectStorage::exists(const StoredObject & object) const
 {
     auto settings_ptr = s3_settings.get();
@@ -183,6 +247,15 @@ std::unique_ptr<WriteBufferFromFileBase> S3ObjectStorage::writeObject( /// NOLIN
         disk_write_settings);
 }
 
+
+ObjectStorageIteratorPtr S3ObjectStorage::iterate(const std::string & path_prefix) const
+{
+    auto settings_ptr = s3_settings.get();
+    auto client_ptr = client.get();
+
+    return std::make_shared<S3IteratorAsync>(bucket, path_prefix, client_ptr, settings_ptr->list_object_keys_size);
+}
+
 void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const
 {
     auto settings_ptr = s3_settings.get();
diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
index b0eb01aec0d..072e1354d38 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
@@ -102,6 +102,8 @@ public:
 
     void listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const override;
 
+    ObjectStorageIteratorPtr iterate(const std::string & path_prefix) const override;
+
     /// Uses `DeleteObjectRequest`.
     void removeObject(const StoredObject & object) override;
 

From 4bb44c7c72417a6e7a5f2ec7e1651b4360f9956e Mon Sep 17 00:00:00 2001
From: cmsxbc <i@cmsis.me>
Date: Sun, 4 Jun 2023 23:06:21 +0800
Subject: [PATCH 37/64] 1. skip extract darwin toolchain in builder when
 uncessary

2. update MacOSX SDK version in toolchain readme to match in builder
---
 cmake/toolchain/darwin-x86_64/README.txt | 4 ++--
 docker/packager/binary/build.sh          | 8 +++++---
 docker/packager/packager                 | 2 ++
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/cmake/toolchain/darwin-x86_64/README.txt b/cmake/toolchain/darwin-x86_64/README.txt
index 65c9aba5be6..90ada960bfa 100644
--- a/cmake/toolchain/darwin-x86_64/README.txt
+++ b/cmake/toolchain/darwin-x86_64/README.txt
@@ -1,2 +1,2 @@
-wget https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX10.15.sdk.tar.xz
-tar xJf MacOSX10.15.sdk.tar.xz --strip-components=1
+wget https://github.com/phracker/MacOSX-SDKs/releases/download/11.3/MacOSX11.0.sdk.tar.xz
+tar xJf MacOSX11.0.sdk.tar.xz --strip-components=1
diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh
index 2cd0a011013..ee1011a9cd5 100755
--- a/docker/packager/binary/build.sh
+++ b/docker/packager/binary/build.sh
@@ -11,9 +11,11 @@ ccache_status () {
 
 [ -O /build ] || git config --global --add safe.directory /build
 
-mkdir -p /build/cmake/toolchain/darwin-x86_64
-tar xJf /MacOSX11.0.sdk.tar.xz -C /build/cmake/toolchain/darwin-x86_64 --strip-components=1
-ln -sf darwin-x86_64 /build/cmake/toolchain/darwin-aarch64
+if [ "$EXTRACT_TOOLCHAIN_DARWIN" = "1" ];then
+  mkdir -p /build/cmake/toolchain/darwin-x86_64
+  tar xJf /MacOSX11.0.sdk.tar.xz -C /build/cmake/toolchain/darwin-x86_64 --strip-components=1
+  ln -sf darwin-x86_64 /build/cmake/toolchain/darwin-aarch64
+fi
 
 # Uncomment to debug ccache. Don't put ccache log in /output right away, or it
 # will be confusingly packed into the "performance" package.
diff --git a/docker/packager/packager b/docker/packager/packager
index a894fe2d8e9..1b3df858cd2 100755
--- a/docker/packager/packager
+++ b/docker/packager/packager
@@ -167,6 +167,7 @@ def parse_env_variables(
         cmake_flags.append(
             "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-x86_64.cmake"
         )
+        result.append("EXTRACT_TOOLCHAIN_DARWIN=1")
     elif is_cross_darwin_arm:
         cc = compiler[: -len(DARWIN_ARM_SUFFIX)]
         cmake_flags.append("-DCMAKE_AR:FILEPATH=/cctools/bin/aarch64-apple-darwin-ar")
@@ -181,6 +182,7 @@ def parse_env_variables(
         cmake_flags.append(
             "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-aarch64.cmake"
         )
+        result.append("EXTRACT_TOOLCHAIN_DARWIN=1")
     elif is_cross_arm:
         cc = compiler[: -len(ARM_SUFFIX)]
         cmake_flags.append(

From e5c95add52ed86c56249fe85d8f7c02132736ae3 Mon Sep 17 00:00:00 2001
From: auxten <auxten@users.noreply.github.com>
Date: Mon, 5 Jun 2023 08:43:55 +0800
Subject: [PATCH 38/64] use old_size

Co-authored-by: Alexey Milovidov <milovidov@clickhouse.com>
---
 src/IO/WriteBufferFromVector.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/IO/WriteBufferFromVector.h b/src/IO/WriteBufferFromVector.h
index c793a34b406..a2ecc34f1ab 100644
--- a/src/IO/WriteBufferFromVector.h
+++ b/src/IO/WriteBufferFromVector.h
@@ -86,7 +86,7 @@ private:
         size_t old_size = vector.size();
         /// pos may not be equal to vector.data() + old_size, because WriteBuffer::next() can be used to flush data
         size_t pos_offset = pos - reinterpret_cast<Position>(vector.data());
-        if (pos_offset == vector.size())
+        if (pos_offset == old_size)
         {
             vector.resize(old_size * size_multiplier);
         }

From 4234c4f36addd2607ecc16131ec67ef1089d10ee Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 5 Jun 2023 02:51:11 +0200
Subject: [PATCH 39/64] Remove flaky test

---
 tests/integration/test_merge_tree_s3/test.py | 25 --------------------
 1 file changed, 25 deletions(-)

diff --git a/tests/integration/test_merge_tree_s3/test.py b/tests/integration/test_merge_tree_s3/test.py
index 7730bfcf7b2..2ccd517923a 100644
--- a/tests/integration/test_merge_tree_s3/test.py
+++ b/tests/integration/test_merge_tree_s3/test.py
@@ -739,31 +739,6 @@ def test_cache_with_full_disk_space(cluster, node_name):
     check_no_objects_after_drop(cluster, node_name=node_name)
 
 
-@pytest.mark.parametrize("node_name", ["node"])
-def test_store_cleanup_disk_s3(cluster, node_name):
-    node = cluster.instances[node_name]
-    node.query("DROP TABLE IF EXISTS s3_test SYNC")
-    node.query(
-        "CREATE TABLE s3_test UUID '00000000-1000-4000-8000-000000000001' (n UInt64) Engine=MergeTree() ORDER BY n SETTINGS storage_policy='s3';"
-    )
-    node.query("INSERT INTO s3_test SELECT 1")
-
-    node.stop_clickhouse(kill=True)
-    path_to_data = "/var/lib/clickhouse/"
-    node.exec_in_container(["rm", f"{path_to_data}/metadata/default/s3_test.sql"])
-    node.start_clickhouse()
-
-    node.wait_for_log_line(
-        "Removing unused directory", timeout=90, look_behind_lines=1000
-    )
-    node.wait_for_log_line("directories from store")
-    node.query(
-        "CREATE TABLE s3_test UUID '00000000-1000-4000-8000-000000000001' (n UInt64) Engine=MergeTree() ORDER BY n SETTINGS storage_policy='s3';"
-    )
-    node.query("INSERT INTO s3_test SELECT 1")
-    check_no_objects_after_drop(cluster)
-
-
 @pytest.mark.parametrize("node_name", ["node"])
 def test_cache_setting_compatibility(cluster, node_name):
     node = cluster.instances[node_name]

From 47379ac03965f4834bf6aaa00ce777dec731a3c9 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 5 Jun 2023 03:58:42 +0300
Subject: [PATCH 40/64] Update build.sh

---
 docker/packager/binary/build.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh
index ee1011a9cd5..c0803c74147 100755
--- a/docker/packager/binary/build.sh
+++ b/docker/packager/binary/build.sh
@@ -11,7 +11,7 @@ ccache_status () {
 
 [ -O /build ] || git config --global --add safe.directory /build
 
-if [ "$EXTRACT_TOOLCHAIN_DARWIN" = "1" ];then
+if [ "$EXTRACT_TOOLCHAIN_DARWIN" = "1" ]; then
   mkdir -p /build/cmake/toolchain/darwin-x86_64
   tar xJf /MacOSX11.0.sdk.tar.xz -C /build/cmake/toolchain/darwin-x86_64 --strip-components=1
   ln -sf darwin-x86_64 /build/cmake/toolchain/darwin-aarch64

From c0f162c5b67b112691b8ef805bf2a56060441a0b Mon Sep 17 00:00:00 2001
From: johanngan <johanngan.us@gmail.com>
Date: Fri, 26 May 2023 15:56:40 -0500
Subject: [PATCH 41/64] Add dictGetAll function for RegExpTreeDictionary

This function outputs an array of attribute values from all regexp nodes
that matched in a regexp tree dictionary. An optional final argument can
be passed to limit the array size.
---
 docs/en/sql-reference/dictionaries/index.md   |  63 ++++++++-
 .../functions/ext-dict-functions.md           |  78 +++++++++++
 src/Dictionaries/IDictionary.h                |  45 +++++++
 src/Dictionaries/RegExpTreeDictionary.cpp     | 122 +++++++++++++++---
 src/Dictionaries/RegExpTreeDictionary.h       |  49 ++++++-
 .../FunctionsExternalDictionaries.cpp         |  15 +++
 src/Functions/FunctionsExternalDictionaries.h |  77 ++++++++---
 ...04_regexp_dictionary_yaml_source.reference |   6 +
 .../02504_regexp_dictionary_yaml_source.sh    |  48 +++++++
 9 files changed, 459 insertions(+), 44 deletions(-)

diff --git a/docs/en/sql-reference/dictionaries/index.md b/docs/en/sql-reference/dictionaries/index.md
index 43e9300c1ae..6c3d80683db 100644
--- a/docs/en/sql-reference/dictionaries/index.md
+++ b/docs/en/sql-reference/dictionaries/index.md
@@ -2280,7 +2280,7 @@ This config consists of a list of regular expression tree nodes. Each node has t
   - The value of an attribute may contain **back references**, referring to capture groups of the matched regular expression. In the example, the value of attribute `version` in the first node consists of a back-reference `\1` to capture group `(\d+[\.\d]*)` in the regular expression. Back-reference numbers range from 1 to 9 and are written as `$1` or `\1` (for number 1). The back reference is replaced by the matched capture group during query execution.
 - **child nodes**: a list of children of a regexp tree node, each of which has its own attributes and (potentially) children nodes. String matching proceeds in a depth-first fashion. If a string matches a regexp node, the dictionary checks if it also matches the nodes' child nodes. If that is the case, the attributes of the deepest matching node are assigned. Attributes of a child node overwrite equally named attributes of parent nodes. The name of child nodes in YAML files can be arbitrary, e.g. `versions` in above example.
 
-Regexp tree dictionaries only allow access using the functions `dictGet` and `dictGetOrDefault`.
+Regexp tree dictionaries only allow access using the functions `dictGet`, `dictGetOrDefault`, and `dictGetAll`.
 
 Example:
 
@@ -2300,6 +2300,67 @@ In this case, we first match the regular expression `\d+/tclwebkit(?:\d+[\.\d]*)
 
 With a powerful YAML configure file, we can use a regexp tree dictionaries as a user agent string parser. We support [uap-core](https://github.com/ua-parser/uap-core) and demonstrate how to use it in the functional test [02504_regexp_dictionary_ua_parser](https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/02504_regexp_dictionary_ua_parser.sh)
 
+#### Collecting Attribute Values
+
+Sometimes it is useful to return values from multiple regular expressions that matched, rather than just the value of a leaf node. In these cases, the specialized [`dictGetAll`](../../sql-reference/functions/ext-dict-functions.md#dictgetall) function can be used. If a node has an attribute value of type `T`, `dictGetAll` will return an `Array(T)` containing zero or more values.
+
+By default, the number of matches returned per key is unbounded. A bound can be passed as an optional fourth argument to `dictGetAll`. The array is populated in _topological order_, meaning that child nodes come before parent nodes, and sibling nodes follow the ordering in the source.
+
+Example:
+
+```sql
+CREATE DICTIONARY regexp_dict
+(
+    regexp String,
+    tag String,
+    topological_index Int64,
+    captured Nullable(String),
+    parent String
+)
+PRIMARY KEY(regexp)
+SOURCE(YAMLRegExpTree(PATH '/var/lib/clickhouse/user_files/regexp_tree.yaml'))
+LAYOUT(regexp_tree)
+LIFETIME(0)
+```
+
+```yaml
+# /var/lib/clickhouse/user_files/regexp_tree.yaml
+- regexp: 'clickhouse\.com'
+  tag: 'ClickHouse'
+  topological_index: 1
+  paths:
+    - regexp: 'clickhouse\.com/docs(.*)'
+      tag: 'ClickHouse Documentation'
+      topological_index: 0
+      captured: '\1'
+      parent: 'ClickHouse'
+
+- regexp: '/docs(/|$)'
+  tag: 'Documentation'
+  topological_index: 2
+
+- regexp: 'github.com'
+  tag: 'GitHub'
+  topological_index: 3
+  captured: 'NULL'
+```
+
+```sql
+CREATE TABLE urls (url String) ENGINE=MergeTree ORDER BY url;
+INSERT INTO urls VALUES ('clickhouse.com'), ('clickhouse.com/docs/en'), ('github.com/clickhouse/tree/master/docs');
+SELECT url, dictGetAll('regexp_dict', ('tag', 'topological_index', 'captured', 'parent'), url, 2) FROM urls;
+```
+
+Result:
+
+```text
+┌─url────────────────────────────────────┬─dictGetAll('regexp_dict', ('tag', 'topological_index', 'captured', 'parent'), url, 2)─┐
+│ clickhouse.com                         │ (['ClickHouse'],[1],[],[])                                                            │
+│ clickhouse.com/docs/en                 │ (['ClickHouse Documentation','ClickHouse'],[0,1],['/en'],['ClickHouse'])              │
+│ github.com/clickhouse/tree/master/docs │ (['Documentation','GitHub'],[2,3],[NULL],[])                                          │
+└────────────────────────────────────────┴───────────────────────────────────────────────────────────────────────────────────────┘
+```
+
 ### Use Regular Expression Tree Dictionary in ClickHouse Cloud
 
 Above used `YAMLRegExpTree` source works in ClickHouse Open Source but not in ClickHouse Cloud. To use regexp tree dictionaries in ClickHouse could, first create a regexp tree dictionary from a YAML file locally in ClickHouse Open Source, then dump this dictionary into a CSV file using the `dictionary` table function and the [INTO OUTFILE](../statements/select/into-outfile.md) clause.
diff --git a/docs/en/sql-reference/functions/ext-dict-functions.md b/docs/en/sql-reference/functions/ext-dict-functions.md
index 7d8aa2c0390..284d6d80405 100644
--- a/docs/en/sql-reference/functions/ext-dict-functions.md
+++ b/docs/en/sql-reference/functions/ext-dict-functions.md
@@ -403,6 +403,84 @@ SELECT dictGetDescendants('hierarchy_flat_dictionary', number, 1) FROM system.nu
 └────────────────────────────────────────────────────────────┘
 ```
 
+
+## dictGetAll
+
+Retrieves the attribute values of all nodes that matched each key in a [regular expression tree dictionary](../../sql-reference/dictionaries/index.md#regexp-tree-dictionary).
+
+Besides returning values of type `Array(T)` instead of `T`, this function behaves similarly to [`dictGet`](#dictget-dictgetordefault-dictgetornull).
+
+**Syntax**
+
+``` sql
+dictGetAll('dict_name', attr_names, id_expr[, limit])
+```
+
+**Arguments**
+
+- `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal).
+- `attr_names` — Name of the column of the dictionary, [String literal](../../sql-reference/syntax.md#syntax-string-literal), or tuple of column names, [Tuple](../../sql-reference/data-types/tuple.md)([String literal](../../sql-reference/syntax.md#syntax-string-literal)).
+- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning array of dictionary key-type value or [Tuple](../../sql-reference/data-types/tuple.md)-type value depending on the dictionary configuration.
+- `limit` - Maximum length for each value array returned. When truncating, child nodes are given precedence over parent nodes, and otherwise the defined list order for the regexp tree dictionary is respected. If unspecified, array length is unlimited.
+
+**Returned value**
+
+- If ClickHouse parses the attribute successfully in the attribute’s data type as defined in the dictionary, returns an array of dictionary attribute values that correspond to `id_expr` for each attribute specified by `attr_names`.
+
+- If there is no key corresponding to `id_expr` in the dictionary, then an empty array is returned.
+
+ClickHouse throws an exception if it cannot parse the value of the attribute or the value does not match the attribute data type.
+
+**Example**
+
+Consider the following regexp tree dictionary:
+
+```sql
+CREATE DICTIONARY regexp_dict
+(
+    regexp String,
+    tag String
+)
+PRIMARY KEY(regexp)
+SOURCE(YAMLRegExpTree(PATH '/var/lib/clickhouse/user_files/regexp_tree.yaml'))
+LAYOUT(regexp_tree)
+...
+```
+
+```yaml
+# /var/lib/clickhouse/user_files/regexp_tree.yaml
+- regexp: 'foo'
+  tag: 'foo_attr'
+- regexp: 'bar'
+  tag: 'bar_attr'
+- regexp: 'baz'
+  tag: 'baz_attr'
+```
+
+Get all matching values:
+
+```sql
+SELECT dictGetAll('regexp_dict', 'tag', 'foobarbaz');
+```
+
+```text
+┌─dictGetAll('regexp_dict', 'tag', 'foobarbaz')─┐
+│ ['foo_attr','bar_attr','baz_attr']            │
+└───────────────────────────────────────────────┘
+```
+
+Get up to 2 matching values:
+
+```sql
+SELECT dictGetAll('regexp_dict', 'tag', 'foobarbaz', 2);
+```
+
+```text
+┌─dictGetAll('regexp_dict', 'tag', 'foobarbaz', 2)─┐
+│ ['foo_attr','bar_attr']                          │
+└──────────────────────────────────────────────────┘
+```
+
 ## Other Functions
 
 ClickHouse supports specialized functions that convert dictionary attribute values to a specific data type regardless of the dictionary configuration.
diff --git a/src/Dictionaries/IDictionary.h b/src/Dictionaries/IDictionary.h
index ee18e8b9a7e..f1834b4b129 100644
--- a/src/Dictionaries/IDictionary.h
+++ b/src/Dictionaries/IDictionary.h
@@ -207,6 +207,51 @@ public:
         return result;
     }
 
+    /**
+     * Analogous to getColumn, but for dictGetAll
+     */
+    virtual ColumnPtr getColumnAllValues(
+        const std::string & attribute_name [[maybe_unused]],
+        const DataTypePtr & result_type [[maybe_unused]],
+        const Columns & key_columns [[maybe_unused]],
+        const DataTypes & key_types [[maybe_unused]],
+        const ColumnPtr & default_values_column [[maybe_unused]],
+        size_t limit [[maybe_unused]]) const
+    {
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED,
+                        "Method getColumnAllValues is not supported for {} dictionary.",
+                        getDictionaryID().getNameForLogs());
+    }
+
+    /**
+     * Analogous to getColumns, but for dictGetAll
+     */
+    virtual Columns getColumnsAllValues(
+        const Strings & attribute_names,
+        const DataTypes & result_types,
+        const Columns & key_columns,
+        const DataTypes & key_types,
+        const Columns & default_values_columns,
+        size_t limit) const
+    {
+        size_t attribute_names_size = attribute_names.size();
+
+        Columns result;
+        result.reserve(attribute_names_size);
+
+        for (size_t i = 0; i < attribute_names_size; ++i)
+        {
+            const auto & attribute_name = attribute_names[i];
+            const auto & result_type = result_types[i];
+            const auto & default_values_column = default_values_columns[i];
+
+            result.emplace_back(getColumnAllValues(
+                attribute_name, result_type, key_columns, key_types, default_values_column, limit));
+        }
+
+        return result;
+    }
+
     /** Subclass must validate key columns and key types and return ColumnUInt8 that
       * is bitmask representation of is key in dictionary or not.
       * If key is in dictionary then value of associated row will be 1, otherwise 0.
diff --git a/src/Dictionaries/RegExpTreeDictionary.cpp b/src/Dictionaries/RegExpTreeDictionary.cpp
index 9841cadcdca..8d0af9b0abf 100644
--- a/src/Dictionaries/RegExpTreeDictionary.cpp
+++ b/src/Dictionaries/RegExpTreeDictionary.cpp
@@ -70,7 +70,7 @@ namespace
         explicit StringPiece(int ref_) : ref_num(ref_) {}
     };
 
-    Field parseStringToField(const String & raw, DataTypePtr data_type)
+    Field parseStringToField(const String & raw, const DataTypePtr data_type)
     try
     {
         ReadBufferFromString buffer(raw);
@@ -419,6 +419,65 @@ RegExpTreeDictionary::RegExpTreeDictionary(
     calculateBytesAllocated();
 }
 
+// Thin wrapper around unordered_map<String, Field> that manages the collection of attribute values subject to the
+// behavior specified by collect_values_limit
+class RegExpTreeDictionary::AttributeCollector : public std::unordered_map<String, Field>
+{
+private:
+    std::optional<size_t> collect_values_limit; // std::nullopt means single-value mode, i.e. don't collect
+    size_t n_full_attributes;
+
+public:
+    explicit AttributeCollector(std::optional<size_t> collect_values_limit_)
+        : collect_values_limit(collect_values_limit_), n_full_attributes(0)
+    {
+    }
+
+    constexpr bool collecting() const { return collect_values_limit != std::nullopt; }
+
+    // Add a name-value pair to the collection if there's space
+    void add(const String & attr_name, Field field)
+    {
+        if (collect_values_limit)
+        {
+            if (!this->contains(attr_name))
+                (*this)[attr_name] = Array();
+
+            Array & values = (*this)[attr_name].safeGet<Array &>();
+            if (values.size() < *collect_values_limit)
+            {
+                values.push_back(std::move(field));
+                if (values.size() == *collect_values_limit)
+                    n_full_attributes++;
+            }
+        }
+        else if (!this->contains(attr_name))
+        {
+            (*this)[attr_name] = std::move(field);
+            n_full_attributes++;
+        }
+    }
+
+    // Checks if no more values can be added for a given attribute
+    inline bool full(const String & attr_name) const
+    {
+        if (collect_values_limit)
+        {
+            auto it = this->find(attr_name);
+            if (it == this->end())
+                return false;
+            return it->second.safeGet<const Array &>().size() >= *collect_values_limit;
+        }
+        else
+        {
+            return this->contains(attr_name);
+        }
+    }
+
+    // Returns the number of full attributes
+    inline size_t attributesFull() const { return n_full_attributes; }
+};
+
 std::pair<String, bool> processBackRefs(const String & data, const re2_st::RE2 & searcher, const std::vector<StringPiece> & pieces)
 {
     re2_st::StringPiece haystack(data.data(), data.size());
@@ -442,7 +501,7 @@ std::pair<String, bool> processBackRefs(const String & data, const re2_st::RE2 &
 // The return value means whether we finish collecting.
 bool RegExpTreeDictionary::setAttributes(
     UInt64 id,
-    std::unordered_map<String, Field> & attributes_to_set,
+    AttributeCollector & attributes_to_set,
     const String & data,
     std::unordered_set<UInt64> & visited_nodes,
     const std::unordered_map<String, const DictionaryAttribute &> & attributes,
@@ -451,34 +510,43 @@ bool RegExpTreeDictionary::setAttributes(
 {
 
     if (visited_nodes.contains(id))
-        return attributes_to_set.size() == attributes.size();
+        return attributes_to_set.attributesFull() == attributes.size();
     visited_nodes.emplace(id);
     const auto & node_attributes = regex_nodes.at(id)->attributes;
     for (const auto & [name_, value] : node_attributes)
     {
-        if (!attributes.contains(name_) || attributes_to_set.contains(name_))
+        if (!attributes.contains(name_) || attributes_to_set.full(name_))
             continue;
+
         if (value.containsBackRefs())
         {
             auto [updated_str, use_default] = processBackRefs(data, regex_nodes.at(id)->searcher, value.pieces);
             if (use_default)
             {
-                DefaultValueProvider default_value(attributes.at(name_).null_value, defaults.at(name_));
-                attributes_to_set[name_] = default_value.getDefaultValue(key_index);
+                // Back-ref processing failed.
+                // - If not collecting values, set the default value immediately while we're still on this node.
+                //   Otherwise, a value from a different node could take its place before we set it to the default value post-walk.
+                // - If collecting values, don't add anything. If we find no other matches for this attribute,
+                //   then we'll set its value to the default Array value later.
+                if (!attributes_to_set.collecting())
+                {
+                    DefaultValueProvider default_value(attributes.at(name_).null_value, defaults.at(name_));
+                    attributes_to_set.add(name_, default_value.getDefaultValue(key_index));
+                }
             }
             else
-                attributes_to_set[name_] = parseStringToField(updated_str, attributes.at(name_).type);
+                attributes_to_set.add(name_, parseStringToField(updated_str, attributes.at(name_).type));
         }
         else
-            attributes_to_set[name_] = value.field;
+            attributes_to_set.add(name_, value.field);
     }
 
     auto parent_id = regex_nodes.at(id)->parent_id;
     if (parent_id > 0)
         setAttributes(parent_id, attributes_to_set, data, visited_nodes, attributes, defaults, key_index);
 
-    /// if all the attributes have set, the walking through can be stopped.
-    return attributes_to_set.size() == attributes.size();
+    /// if all attributes are full, we can stop walking the tree
+    return attributes_to_set.attributesFull() == attributes.size();
 }
 
 /// a temp struct to store all the matched result.
@@ -550,7 +618,8 @@ std::unordered_map<String, ColumnPtr> RegExpTreeDictionary::match(
     const ColumnString::Chars & keys_data,
     const ColumnString::Offsets & keys_offsets,
     const std::unordered_map<String, const DictionaryAttribute &> & attributes,
-    const std::unordered_map<String, ColumnPtr> & defaults) const
+    const std::unordered_map<String, ColumnPtr> & defaults,
+    std::optional<size_t> collect_values_limit) const
 {
 
 #if USE_VECTORSCAN
@@ -573,7 +642,7 @@ std::unordered_map<String, ColumnPtr> RegExpTreeDictionary::match(
     /// initialize columns
     for (const auto & [name_, attr] : attributes)
     {
-        auto col_ptr = attr.type->createColumn();
+        auto col_ptr = (collect_values_limit ? std::make_shared<DataTypeArray>(attr.type) : attr.type)->createColumn();
         col_ptr->reserve(keys_offsets.size());
         columns[name_] = std::move(col_ptr);
     }
@@ -630,11 +699,11 @@ std::unordered_map<String, ColumnPtr> RegExpTreeDictionary::match(
 
         match_result.sort();
         /// Walk through the regex tree util all attributes are set;
-        std::unordered_map<String, Field> attributes_to_set;
+        AttributeCollector attributes_to_set{collect_values_limit};
         std::unordered_set<UInt64> visited_nodes;
 
         /// Some node matches but its parents cannot match. In this case we must regard this node unmatched.
-        auto is_invalid = [&](UInt64 id)
+        auto is_valid = [&](UInt64 id)
         {
             while (id)
             {
@@ -650,7 +719,7 @@ std::unordered_map<String, ColumnPtr> RegExpTreeDictionary::match(
         for (auto item : match_result.matched_idx_sorted_list)
         {
             UInt64 id = item.second;
-            if (!is_invalid(id))
+            if (!is_valid(id))
                 continue;
             if (visited_nodes.contains(id))
                 continue;
@@ -663,7 +732,8 @@ std::unordered_map<String, ColumnPtr> RegExpTreeDictionary::match(
             if (attributes_to_set.contains(name_))
                 continue;
 
-            DefaultValueProvider default_value(attr.null_value, defaults.at(name_));
+            DefaultValueProvider default_value(
+                collect_values_limit ? DataTypeArray(attr.type).getDefault() : attr.null_value, defaults.at(name_));
             columns[name_]->insert(default_value.getDefaultValue(key_idx));
         }
 
@@ -727,12 +797,13 @@ Pipe RegExpTreeDictionary::read(const Names & , size_t max_block_size, size_t) c
     return Pipe(std::make_shared<BlocksListSource>(std::move(result)));
 }
 
-Columns RegExpTreeDictionary::getColumns(
+Columns RegExpTreeDictionary::getColumnsImpl(
     const Strings & attribute_names,
     const DataTypes & result_types,
     const Columns & key_columns,
     const DataTypes & key_types,
-    const Columns & default_values_columns) const
+    const Columns & default_values_columns,
+    std::optional<size_t> collect_values_limit) const
 {
     /// valid check
     if (key_columns.size() != 1)
@@ -746,7 +817,17 @@ Columns RegExpTreeDictionary::getColumns(
 
     for (size_t i = 0; i < attribute_names.size(); i++)
     {
-        const auto & attribute = structure.getAttribute(attribute_names[i], result_types[i]);
+        DataTypePtr attribute_type = result_types[i];
+        if (collect_values_limit)
+        {
+            if (!WhichDataType(attribute_type).isArray())
+                throw Exception(
+                    ErrorCodes::LOGICAL_ERROR, "Expected Array result type for attribute `{}`, got `{}`",
+                    attribute_names[i],
+                    attribute_type->getName());
+            attribute_type = assert_cast<const DataTypeArray &>(*attribute_type).getNestedType();
+        }
+        const auto & attribute = structure.getAttribute(attribute_names[i], attribute_type);
         attributes.emplace(attribute.name, attribute);
         defaults[attribute.name] = default_values_columns[i];
     }
@@ -757,7 +838,8 @@ Columns RegExpTreeDictionary::getColumns(
         key_column->getChars(),
         key_column->getOffsets(),
         attributes,
-        defaults);
+        defaults,
+        collect_values_limit);
 
     Columns result;
     for (const String & name_ : attribute_names)
diff --git a/src/Dictionaries/RegExpTreeDictionary.h b/src/Dictionaries/RegExpTreeDictionary.h
index 683588e688f..30966184eb6 100644
--- a/src/Dictionaries/RegExpTreeDictionary.h
+++ b/src/Dictionaries/RegExpTreeDictionary.h
@@ -101,16 +101,50 @@ public:
         const Columns & key_columns,
         const DataTypes & key_types,
         const ColumnPtr & default_values_column) const override
-        {
-            return getColumns(Strings({attribute_name}), DataTypes({result_type}), key_columns, key_types, Columns({default_values_column}))[0];
-        }
+    {
+        return getColumns(Strings({attribute_name}), DataTypes({result_type}), key_columns, key_types, Columns({default_values_column}))[0];
+    }
 
     Columns getColumns(
         const Strings & attribute_names,
         const DataTypes & result_types,
         const Columns & key_columns,
         const DataTypes & key_types,
-        const Columns & default_values_columns) const override;
+        const Columns & default_values_columns) const override
+    {
+        return getColumnsImpl(attribute_names, result_types, key_columns, key_types, default_values_columns, std::nullopt);
+    }
+
+    ColumnPtr getColumnAllValues(
+        const std::string & attribute_name,
+        const DataTypePtr & result_type,
+        const Columns & key_columns,
+        const DataTypes & key_types,
+        const ColumnPtr & default_values_column,
+        size_t limit) const override
+    {
+        return getColumnsAllValues(
+            Strings({attribute_name}), DataTypes({result_type}), key_columns, key_types, Columns({default_values_column}), limit)[0];
+    }
+
+    Columns getColumnsAllValues(
+        const Strings & attribute_names,
+        const DataTypes & result_types,
+        const Columns & key_columns,
+        const DataTypes & key_types,
+        const Columns & default_values_columns,
+        size_t limit) const override
+    {
+        return getColumnsImpl(attribute_names, result_types, key_columns, key_types, default_values_columns, limit);
+    }
+
+    Columns getColumnsImpl(
+        const Strings & attribute_names,
+        const DataTypes & result_types,
+        const Columns & key_columns,
+        const DataTypes & key_types,
+        const Columns & default_values_columns,
+        std::optional<size_t> collect_values_limit) const;
 
 private:
     const DictionaryStructure structure;
@@ -137,11 +171,14 @@ private:
         const ColumnString::Chars & keys_data,
         const ColumnString::Offsets & keys_offsets,
         const std::unordered_map<String, const DictionaryAttribute &> & attributes,
-        const std::unordered_map<String, ColumnPtr> & defaults) const;
+        const std::unordered_map<String, ColumnPtr> & defaults,
+        std::optional<size_t> collect_values_limit) const;
+
+    class AttributeCollector;
 
     bool setAttributes(
         UInt64 id,
-        std::unordered_map<String, Field> & attributes_to_set,
+        AttributeCollector & attributes_to_set,
         const String & data,
         std::unordered_set<UInt64> & visited_nodes,
         const std::unordered_map<String, const DictionaryAttribute &> & attributes,
diff --git a/src/Functions/FunctionsExternalDictionaries.cpp b/src/Functions/FunctionsExternalDictionaries.cpp
index 70b1e3cc861..9fa08c82d41 100644
--- a/src/Functions/FunctionsExternalDictionaries.cpp
+++ b/src/Functions/FunctionsExternalDictionaries.cpp
@@ -45,11 +45,26 @@ Accepts 3 parameters:
 Returned value: value of the dictionary attribute parsed in the attribute’s data type if key is found, otherwise NULL.
 
 Throws an exception if cannot parse the value of the attribute or the value does not match the attribute data type.
+)" };
+
+    constexpr auto dict_get_all_description { R"(
+Retrieves all values from a dictionary corresponding to the given key values.
+
+Accepts 3 or 4 parameters:
+-- name of the dictionary;
+-- name of the column of the dictionary or tuple of column names;
+-- key value - expression returning dictionary key-type value or tuple-type value - depending on the dictionary configuration;
+-- [optional] maximum number of values to return for each attribute;
+
+Returned value: array of dictionary attribute values parsed in the attribute's data type if key is found, otherwise empty array.
+
+Throws an exception if cannot parse the value of the attribute, the value does not match the attribute data type, or the dictionary doesn't support this function.
 )" };
 
     factory.registerFunction<FunctionDictGetNoType<DictionaryGetFunctionType::get>>(FunctionDocumentation{ .description=fmt::format(dict_get_description, "attribute’s data type") });
     factory.registerFunction<FunctionDictGetNoType<DictionaryGetFunctionType::getOrDefault>>(FunctionDocumentation{ .description=fmt::format(dict_get_or_default_description, "attribute’s data type") });
     factory.registerFunction<FunctionDictGetOrNull>(FunctionDocumentation{ .description=dict_get_or_null_description });
+    factory.registerFunction<FunctionDictGetNoType<DictionaryGetFunctionType::getAll>>(FunctionDocumentation{ .description=dict_get_all_description });
 
     factory.registerFunction<FunctionDictGetUInt8>(FunctionDocumentation{ .description=fmt::format(dict_get_description, "UInt8") });
     factory.registerFunction<FunctionDictGetUInt16>(FunctionDocumentation{ .description=fmt::format(dict_get_description, "UInt16") });
diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h
index 97d85f384bc..e4529ff1765 100644
--- a/src/Functions/FunctionsExternalDictionaries.h
+++ b/src/Functions/FunctionsExternalDictionaries.h
@@ -296,7 +296,8 @@ private:
 enum class DictionaryGetFunctionType
 {
     get,
-    getOrDefault
+    getOrDefault,
+    getAll
 };
 
 /// This variant of function derives the result type automatically.
@@ -304,7 +305,10 @@ template <DictionaryGetFunctionType dictionary_get_function_type>
 class FunctionDictGetNoType final : public IFunction
 {
 public:
-    static constexpr auto name = dictionary_get_function_type == DictionaryGetFunctionType::get ? "dictGet" : "dictGetOrDefault";
+    // Kind of gross but we need a static field called "name" for FunctionFactory::registerFunction, and this is the easiest way
+    static constexpr auto name = (dictionary_get_function_type == DictionaryGetFunctionType::get)
+        ? "dictGet"
+        : ((dictionary_get_function_type == DictionaryGetFunctionType::getOrDefault) ? "dictGetOrDefault" : "dictGetAll");
 
     static FunctionPtr create(ContextPtr context)
     {
@@ -321,7 +325,13 @@ public:
 
     bool useDefaultImplementationForConstants() const final { return true; }
     bool useDefaultImplementationForNulls() const final { return false; }
-    ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0, 1}; }
+    ColumnNumbers getArgumentsThatAreAlwaysConstant() const final
+    {
+        if constexpr (dictionary_get_function_type == DictionaryGetFunctionType::getAll)
+            return {0, 1, 3};
+        else
+            return {0, 1};
+    }
 
     bool isDeterministic() const override { return false; }
 
@@ -360,6 +370,15 @@ public:
         }
 
         bool key_is_nullable = arguments[2].type->isNullable();
+        if (dictionary_get_function_type == DictionaryGetFunctionType::getAll)
+        {
+            if (key_is_nullable)
+                throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Function {} does not support nullable keys", getName());
+
+            // Wrap all the attribute types in Array()
+            for (auto it = attribute_types.begin(); it != attribute_types.end(); ++it)
+                *it = std::make_shared<DataTypeArray>(*it);
+        }
         if (attribute_types.size() > 1)
         {
             if (key_is_nullable)
@@ -424,6 +443,7 @@ public:
         }
 
         Columns default_cols;
+        size_t collect_values_limit = std::numeric_limits<size_t>::max();
 
         if (dictionary_get_function_type == DictionaryGetFunctionType::getOrDefault)
         {
@@ -464,6 +484,19 @@ public:
         }
         else
         {
+            if (dictionary_get_function_type == DictionaryGetFunctionType::getAll && current_arguments_index < arguments.size())
+            {
+                auto limit_col = arguments[current_arguments_index].column;
+                if (!limit_col || !isColumnConst(*limit_col))
+                    throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                        "Illegal type {} of fourth argument of function {}. Expected const unsigned integer.",
+                        arguments[current_arguments_index].type->getName(),
+                        getName());
+
+                collect_values_limit = limit_col->getUInt(0);
+                ++current_arguments_index;
+            }
+
             for (size_t i = 0; i < attribute_names.size(); ++i)
                 default_cols.emplace_back(nullptr);
         }
@@ -549,7 +582,8 @@ public:
             attribute_type = attribute_types.front();
         }
 
-        auto result_column = executeDictionaryRequest(dictionary, attribute_names, key_columns, key_types, attribute_type, default_cols);
+        auto result_column = executeDictionaryRequest(
+            dictionary, attribute_names, key_columns, key_types, attribute_type, default_cols, collect_values_limit);
 
         if (key_is_nullable)
             result_column = wrapInNullable(result_column, {arguments[2]}, result_type, input_rows_count);
@@ -565,7 +599,8 @@ private:
         const Columns & key_columns,
         const DataTypes & key_types,
         const DataTypePtr & result_type,
-        const Columns & default_cols) const
+        const Columns & default_cols,
+        size_t collect_values_limit) const
     {
         ColumnPtr result;
 
@@ -573,23 +608,31 @@ private:
         {
             const auto & result_tuple_type = assert_cast<const DataTypeTuple &>(*result_type);
 
-            Columns result_columns = dictionary->getColumns(
-                attribute_names,
-                result_tuple_type.getElements(),
-                key_columns,
-                key_types,
-                default_cols);
+            Columns result_columns;
+            if (dictionary_get_function_type == DictionaryGetFunctionType::getAll)
+            {
+                result_columns = dictionary->getColumnsAllValues(
+                    attribute_names, result_tuple_type.getElements(), key_columns, key_types, default_cols, collect_values_limit);
+            }
+            else
+            {
+                result_columns
+                    = dictionary->getColumns(attribute_names, result_tuple_type.getElements(), key_columns, key_types, default_cols);
+            }
 
             result = ColumnTuple::create(std::move(result_columns));
         }
         else
         {
-            result = dictionary->getColumn(
-                attribute_names[0],
-                result_type,
-                key_columns,
-                key_types,
-                default_cols.front());
+            if (dictionary_get_function_type == DictionaryGetFunctionType::getAll)
+            {
+                result = dictionary->getColumnAllValues(
+                    attribute_names[0], result_type, key_columns, key_types, default_cols.front(), collect_values_limit);
+            }
+            else
+            {
+                result = dictionary->getColumn(attribute_names[0], result_type, key_columns, key_types, default_cols.front());
+            }
         }
 
         return result;
diff --git a/tests/queries/0_stateless/02504_regexp_dictionary_yaml_source.reference b/tests/queries/0_stateless/02504_regexp_dictionary_yaml_source.reference
index dfcd170e8f4..437012dd516 100644
--- a/tests/queries/0_stateless/02504_regexp_dictionary_yaml_source.reference
+++ b/tests/queries/0_stateless/02504_regexp_dictionary_yaml_source.reference
@@ -5,3 +5,9 @@
 ('BlackBerry WebKit','10.0')
 ('BlackBerry WebKit','1.0')
 (true,'61f0c404-5cb3-11e7-907b-a6006ad3dba0','2023-01-01','2023-01-01 01:01:01',[1,2,3,-1,-2,-3])
+(['ClickHouse'],[1],[],[])
+(['ClickHouse'],[1],[],[])
+(['ClickHouse Documentation','ClickHouse','Documentation'],[0,1,2],['/en'],['ClickHouse'])
+(['ClickHouse Documentation','ClickHouse'],[0,1],['/en'],['ClickHouse'])
+(['Documentation','GitHub'],[2,3],[NULL],[])
+(['Documentation','GitHub'],[2,3],[NULL],[])
diff --git a/tests/queries/0_stateless/02504_regexp_dictionary_yaml_source.sh b/tests/queries/0_stateless/02504_regexp_dictionary_yaml_source.sh
index 1b5a9cdeea4..ac0793460a9 100755
--- a/tests/queries/0_stateless/02504_regexp_dictionary_yaml_source.sh
+++ b/tests/queries/0_stateless/02504_regexp_dictionary_yaml_source.sh
@@ -128,9 +128,57 @@ LAYOUT(regexp_tree);
 select dictGet('regexp_dict2', ('col_bool','col_uuid', 'col_date', 'col_datetime', 'col_array'), 'abc');
 "
 
+cat > "$yaml" <<EOL
+- regexp: 'clickhouse\.com'
+  tag: 'ClickHouse'
+  topological_index: 1
+  paths:
+    - regexp: 'clickhouse\.com/docs(.*)'
+      tag: 'ClickHouse Documentation'
+      topological_index: 0
+      captured: '\1'
+      parent: 'ClickHouse'
+
+- regexp: '/docs(/|$)'
+  tag: 'Documentation'
+  topological_index: 2
+
+- regexp: 'github.com'
+  tag: 'GitHub'
+  topological_index: 3
+  captured: 'NULL'
+EOL
+
+# dictGetAll
+$CLICKHOUSE_CLIENT -n --query="
+drop dictionary if exists regexp_dict3;
+create dictionary regexp_dict3
+(
+    regexp String,
+    tag String,
+    topological_index Int64,
+    captured Nullable(String),
+    parent String
+)
+PRIMARY KEY(regexp)
+SOURCE(YAMLRegExpTree(PATH '$yaml'))
+LIFETIME(0)
+LAYOUT(regexp_tree);
+
+select dictGetAll('regexp_dict3', ('tag', 'topological_index', 'captured', 'parent'), 'clickhouse.com');
+select dictGetAll('regexp_dict3', ('tag', 'topological_index', 'captured', 'parent'), 'clickhouse.com', 2);
+
+select dictGetAll('regexp_dict3', ('tag', 'topological_index', 'captured', 'parent'), 'clickhouse.com/docs/en');
+select dictGetAll('regexp_dict3', ('tag', 'topological_index', 'captured', 'parent'), 'clickhouse.com/docs/en', 2);
+
+select dictGetAll('regexp_dict3', ('tag', 'topological_index', 'captured', 'parent'), 'github.com/clickhouse/tree/master/docs');
+select dictGetAll('regexp_dict3', ('tag', 'topological_index', 'captured', 'parent'), 'github.com/clickhouse/tree/master/docs', 2);
+"
+
 $CLICKHOUSE_CLIENT -n --query="
 drop dictionary regexp_dict1;
 drop dictionary regexp_dict2;
+drop dictionary regexp_dict3;
 "
 
 rm -rf "$USER_FILES_PATH/test_02504"

From 8d4206df66a8356065fa82168b0a4cb75411cbec Mon Sep 17 00:00:00 2001
From: johanngan <johanngan.us@gmail.com>
Date: Sun, 4 Jun 2023 19:48:14 -0500
Subject: [PATCH 42/64] Review comments: Use constexpr-if in more places

Also add a comment about the apparent lack of type checking on the limit
column.
---
 src/Functions/FunctionsExternalDictionaries.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h
index e4529ff1765..db6529da73c 100644
--- a/src/Functions/FunctionsExternalDictionaries.h
+++ b/src/Functions/FunctionsExternalDictionaries.h
@@ -370,7 +370,7 @@ public:
         }
 
         bool key_is_nullable = arguments[2].type->isNullable();
-        if (dictionary_get_function_type == DictionaryGetFunctionType::getAll)
+        if constexpr (dictionary_get_function_type == DictionaryGetFunctionType::getAll)
         {
             if (key_is_nullable)
                 throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Function {} does not support nullable keys", getName());
@@ -487,6 +487,7 @@ public:
             if (dictionary_get_function_type == DictionaryGetFunctionType::getAll && current_arguments_index < arguments.size())
             {
                 auto limit_col = arguments[current_arguments_index].column;
+                // The getUInt later attempts to cast and throws on a type mismatch, so skip actual type checking here
                 if (!limit_col || !isColumnConst(*limit_col))
                     throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
                         "Illegal type {} of fourth argument of function {}. Expected const unsigned integer.",
@@ -609,7 +610,7 @@ private:
             const auto & result_tuple_type = assert_cast<const DataTypeTuple &>(*result_type);
 
             Columns result_columns;
-            if (dictionary_get_function_type == DictionaryGetFunctionType::getAll)
+            if constexpr (dictionary_get_function_type == DictionaryGetFunctionType::getAll)
             {
                 result_columns = dictionary->getColumnsAllValues(
                     attribute_names, result_tuple_type.getElements(), key_columns, key_types, default_cols, collect_values_limit);
@@ -624,7 +625,7 @@ private:
         }
         else
         {
-            if (dictionary_get_function_type == DictionaryGetFunctionType::getAll)
+            if constexpr (dictionary_get_function_type == DictionaryGetFunctionType::getAll)
             {
                 result = dictionary->getColumnAllValues(
                     attribute_names[0], result_type, key_columns, key_types, default_cols.front(), collect_values_limit);

From 5e1c93c9c819a1d5819ab742fe4981199c621462 Mon Sep 17 00:00:00 2001
From: johanngan <johanngan.us@gmail.com>
Date: Sun, 4 Jun 2023 21:09:41 -0500
Subject: [PATCH 43/64] Add dictGetAll to spell-check dictionary

---
 utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt
index 0455556ae96..d6cef1883f4 100644
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@@ -1342,6 +1342,7 @@ detectLanguageUnknown
 determinator
 deterministically
 dictGet
+dictGetAll
 dictGetChildren
 dictGetDescendant
 dictGetHierarchy

From f1058d2d9d2201f21882b487499ea4f4212fec0b Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Mon, 5 Jun 2023 09:51:16 +0300
Subject: [PATCH 44/64] Revert "Disable skim (Rust library) under memory
 sanitizer"

---
 rust/skim/CMakeLists.txt | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/rust/skim/CMakeLists.txt b/rust/skim/CMakeLists.txt
index c2e406ec12f..1e7a43aba7c 100644
--- a/rust/skim/CMakeLists.txt
+++ b/rust/skim/CMakeLists.txt
@@ -14,11 +14,6 @@ if (OS_FREEBSD)
     return()
 endif()
 
-if (SANITIZE STREQUAL "memory")
-    message(STATUS "skim is disabled under memory sanitizer, because the interop is not instrumented properly")
-    return()
-endif()
-
 clickhouse_import_crate(MANIFEST_PATH Cargo.toml)
 
 # -Wno-dollar-in-identifier-extension: cxx bridge complies names with '$'

From c860db0fb77ad247e39dec10b3419ab1cb2b05e3 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Mon, 5 Jun 2023 10:32:46 +0300
Subject: [PATCH 45/64] Fixed tests

---
 src/Functions/if.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/Functions/if.cpp b/src/Functions/if.cpp
index 8d43b3a4ca3..65e2212e894 100644
--- a/src/Functions/if.cpp
+++ b/src/Functions/if.cpp
@@ -1124,6 +1124,9 @@ public:
             return {};
 
         const ColumnConst * cond_const_col = checkAndGetColumnConst<ColumnVector<UInt8>>(arg_cond.column.get());
+        if (!cond_const_col)
+            return {};
+
         bool condition_value = cond_const_col->getValue<UInt8>();
 
         const ColumnWithTypeAndName & arg_then = arguments[1];

From 4225cab2e8203b45e17188da28c3f6a1a330878c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?=
 <benjamin.antal@clickhouse.com>
Date: Mon, 5 Jun 2023 08:34:25 +0000
Subject: [PATCH 46/64] Rewrite bugprone shell script command

---
 tests/queries/0_stateless/02771_system_user_processes.sh | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02771_system_user_processes.sh b/tests/queries/0_stateless/02771_system_user_processes.sh
index 910af4be9e2..f0e5b2a6987 100755
--- a/tests/queries/0_stateless/02771_system_user_processes.sh
+++ b/tests/queries/0_stateless/02771_system_user_processes.sh
@@ -7,7 +7,13 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 USER_POSTFIX=`random_str 10`
 USER="test_user_02771_$USER_POSTFIX"
 
-$CLICKHOUSE_CLIENT -q "SHOW USER PROCESSES" &>"${CLICKHOUSE_TMP}/test_output" && echo "SHOW USER PROCESSES query succeeded!" || cat "${CLICKHOUSE_TMP}/test_output" 
+if $CLICKHOUSE_CLIENT -q "SHOW USER PROCESSES" &>"${CLICKHOUSE_TMP}/test_output"
+then
+    echo "SHOW USER PROCESSES query succeeded!"
+else
+    cat "${CLICKHOUSE_TMP}/test_output"
+fi
+
 $CLICKHOUSE_CLIENT -q "DROP USER IF EXISTS $USER"
 $CLICKHOUSE_CLIENT -q "CREATE USER $USER"
 $CLICKHOUSE_CLIENT -u "$USER" -q "SELECT * FROM system.numbers LIMIT 1"

From 3082029406846dd92f1d9018156fc0bf9fee8d7c Mon Sep 17 00:00:00 2001
From: ismailakpolat <ismailakpolat@users.noreply.github.com>
Date: Mon, 5 Jun 2023 13:03:17 +0300
Subject: [PATCH 47/64] Update rabbitmq.md

Duplicate parameter name in definition
---
 docs/en/engines/table-engines/integrations/rabbitmq.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/docs/en/engines/table-engines/integrations/rabbitmq.md b/docs/en/engines/table-engines/integrations/rabbitmq.md
index 08062278904..7620cd22767 100644
--- a/docs/en/engines/table-engines/integrations/rabbitmq.md
+++ b/docs/en/engines/table-engines/integrations/rabbitmq.md
@@ -42,7 +42,6 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
     [rabbitmq_queue_consume = false,]
     [rabbitmq_address = '',]
     [rabbitmq_vhost = '/',]
-    [rabbitmq_queue_consume = false,]
     [rabbitmq_username = '',]
     [rabbitmq_password = '',]
     [rabbitmq_commit_on_select = false,]

From a224c8936ccebc243ab82b7338ddd93a10f3c099 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?=
 <benjamin.antal@clickhouse.com>
Date: Mon, 5 Jun 2023 10:18:07 +0000
Subject: [PATCH 48/64] Fix minor issues in documentation

---
 docs/en/operations/system-tables/processes.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/en/operations/system-tables/processes.md b/docs/en/operations/system-tables/processes.md
index 2e729920ed0..ffa37357053 100644
--- a/docs/en/operations/system-tables/processes.md
+++ b/docs/en/operations/system-tables/processes.md
@@ -10,14 +10,14 @@ Columns:
 - `user` (String) – The user who made the query. Keep in mind that for distributed processing, queries are sent to remote servers under the `default` user. The field contains the username for a specific query, not for a query that this query initiated.
 - `address` (String) – The IP address the request was made from. The same for distributed processing. To track where a distributed query was originally made from, look at `system.processes` on the query requestor server.
 - `elapsed` (Float64) – The time in seconds since request execution started.
-- `rows_read` (UInt64) – The number of rows read from the table. For distributed processing, on the requestor server, this is the total for all remote servers.
-- `bytes_read` (UInt64) – The number of uncompressed bytes read from the table. For distributed processing, on the requestor server, this is the total for all remote servers.
+- `read_rows` (UInt64) – The number of rows read from the table. For distributed processing, on the requestor server, this is the total for all remote servers.
+- `read_bytes` (UInt64) – The number of uncompressed bytes read from the table. For distributed processing, on the requestor server, this is the total for all remote servers.
 - `total_rows_approx` (UInt64) – The approximation of the total number of rows that should be read. For distributed processing, on the requestor server, this is the total for all remote servers. It can be updated during request processing, when new sources to process become known.
-- `memory_usage` (UInt64) – Amount of RAM the request uses. It might not include some types of dedicated memory. See the [max_memory_usage](../../operations/settings/query-complexity.md#settings_max_memory_usage) setting.
+- `memory_usage` (Int64) – Amount of RAM the request uses. It might not include some types of dedicated memory. See the [max_memory_usage](../../operations/settings/query-complexity.md#settings_max_memory_usage) setting.
 - `query` (String) – The query text. For `INSERT`, it does not include the data to insert.
 - `query_id` (String) – Query ID, if defined.
-- `is_cancelled` (Int8) – Was query cancelled.
-- `is_all_data_sent` (Int8) – Was all data sent to the client (in other words query had been finished on the server).
+- `is_cancelled` (UInt8) – Was query cancelled.
+- `is_all_data_sent` (UInt8) – Was all data sent to the client (in other words query had been finished on the server).
 
 ```sql
 SELECT * FROM system.processes LIMIT 10 FORMAT Vertical;

From 256f713d6b75bf971203f24bb6db0fcab6fa9aec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?=
 <benjamin.antal@clickhouse.com>
Date: Mon, 5 Jun 2023 10:18:25 +0000
Subject: [PATCH 49/64] Add docs for `system.user_processes`

---
 .../system-tables/user_processes.md           | 28 +++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100644 docs/en/operations/system-tables/user_processes.md

diff --git a/docs/en/operations/system-tables/user_processes.md b/docs/en/operations/system-tables/user_processes.md
new file mode 100644
index 00000000000..a9b97390ed6
--- /dev/null
+++ b/docs/en/operations/system-tables/user_processes.md
@@ -0,0 +1,28 @@
+---
+slug: /en/operations/system-tables/user_processes
+---
+# user_processes
+
+This system table is used for implementing the `SHOW USER PROCESSES` query.
+
+Columns:
+
+- `user` ([String](../../sql-reference/data-types/string.md)) — User name.
+- `memory_usage` ([Int64](../../sql-reference/data-types/int-uint#int-ranges)) – Sum of RAM used by all processes of the user. It might not include some types of dedicated memory. See the [max_memory_usage](../../operations/settings/query-complexity.md#settings_max_memory_usage) setting.
+- `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint#int-ranges)) — The peak of memory usage of the user. It can be reset when no queries are run for the user.
+- `ProfileEvents` ([Map(String, UInt64)](../../sql-reference/data-types/map)) – Summary of ProfileEvents that measure different metrics for the user. The description of them could be found in the table [system.events](../../operations/system-tables/events.md#system_tables-events)
+
+```sql
+SELECT * FROM system.user_processes LIMIT 10 FORMAT Vertical;
+```
+
+```response
+Row 1:
+──────
+user:              default
+memory_usage:      9832
+peak_memory_usage: 9832
+ProfileEvents:     {'Query':5,'SelectQuery':5,'QueriesWithSubqueries':38,'SelectQueriesWithSubqueries':38,'QueryTimeMicroseconds':842048,'SelectQueryTimeMicroseconds':842048,'ReadBufferFromFileDescriptorRead':6,'ReadBufferFromFileDescriptorReadBytes':234,'IOBufferAllocs':3,'IOBufferAllocBytes':98493,'ArenaAllocChunks':283,'ArenaAllocBytes':1482752,'FunctionExecute':670,'TableFunctionExecute':16,'DiskReadElapsedMicroseconds':19,'NetworkSendElapsedMicroseconds':684,'NetworkSendBytes':139498,'SelectedRows':6076,'SelectedBytes':685802,'ContextLock':1140,'RWLockAcquiredReadLocks':193,'RWLockReadersWaitMilliseconds':4,'RealTimeMicroseconds':1585163,'UserTimeMicroseconds':889767,'SystemTimeMicroseconds':13630,'SoftPageFaults':1947,'OSCPUWaitMicroseconds':6,'OSCPUVirtualTimeMicroseconds':903251,'OSReadChars':28631,'OSWriteChars':28888,'QueryProfilerRuns':3,'LogTrace':79,'LogDebug':24}
+
+1 row in set. Elapsed: 0.010 sec.
+```

From e140cad10c0e0ab1a3a291b0ceef37a72bcb4295 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?=
 <benjamin.antal@clickhouse.com>
Date: Mon, 5 Jun 2023 10:18:36 +0000
Subject: [PATCH 50/64] Clean up includes

---
 src/Storages/System/StorageSystemUserProcesses.cpp | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/Storages/System/StorageSystemUserProcesses.cpp b/src/Storages/System/StorageSystemUserProcesses.cpp
index 5973f9e2af3..de34fede0ac 100644
--- a/src/Storages/System/StorageSystemUserProcesses.cpp
+++ b/src/Storages/System/StorageSystemUserProcesses.cpp
@@ -2,7 +2,6 @@
 #include <Columns/ColumnsNumber.h>
 #include <Core/Settings.h>
 #include <DataTypes/DataTypeArray.h>
-#include <DataTypes/DataTypeFactory.h>
 #include <DataTypes/DataTypeMap.h>
 #include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypesNumber.h>
@@ -10,8 +9,6 @@
 #include <Interpreters/ProcessList.h>
 #include <Interpreters/ProfileEventsExt.h>
 #include <Storages/System/StorageSystemUserProcesses.h>
-#include <Common/IPv6ToBinary.h>
-#include <Common/typeid_cast.h>
 
 
 namespace DB

From f3a8517a447daeafd6c5bad0b819a2122cf6161c Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Mon, 5 Jun 2023 13:07:07 +0200
Subject: [PATCH 51/64] Fix

---
 src/Interpreters/Cache/FileCache.cpp |  2 +-
 src/Interpreters/Cache/Metadata.cpp  | 10 ++++++++++
 src/Interpreters/Cache/Metadata.h    |  2 +-
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp
index 65dca790183..5ccbe6ad72d 100644
--- a/src/Interpreters/Cache/FileCache.cpp
+++ b/src/Interpreters/Cache/FileCache.cpp
@@ -123,7 +123,7 @@ FileSegments FileCache::getImpl(const LockedKey & locked_key, const FileSegment:
     auto add_to_result = [&](const FileSegmentMetadata & file_segment_metadata)
     {
         FileSegmentPtr file_segment;
-        if (file_segment_metadata.valid())
+        if (!file_segment_metadata.evicting())
         {
             file_segment = file_segment_metadata.file_segment;
             if (file_segment->isDownloaded())
diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp
index 843ffd45b63..fea552c4071 100644
--- a/src/Interpreters/Cache/Metadata.cpp
+++ b/src/Interpreters/Cache/Metadata.cpp
@@ -346,6 +346,16 @@ void LockedKey::removeAllReleasable()
             ++it;
             continue;
         }
+        else if (it->second.evicting())
+        {
+            /// File segment is currently a removal candidate,
+            /// we do not know if it will be removed or not yet,
+            /// but its size is currently accounted as potentially removed,
+            /// so if we remove file segment now, we break the freeable_count
+            /// calculation in tryReserve.
+            ++it;
+            continue;
+        }
 
         auto file_segment = it->second->file_segment;
         it = removeFileSegment(file_segment->offset(), file_segment->lock());
diff --git a/src/Interpreters/Cache/Metadata.h b/src/Interpreters/Cache/Metadata.h
index 2e015b07ed0..4732123fabc 100644
--- a/src/Interpreters/Cache/Metadata.h
+++ b/src/Interpreters/Cache/Metadata.h
@@ -22,7 +22,7 @@ struct FileSegmentMetadata : private boost::noncopyable
 
     size_t size() const;
 
-    bool valid() const { return !removal_candidate.load(); }
+    bool evicting() const { return !removal_candidate.load(); }
 
     Priority::Iterator getQueueIterator() const { return file_segment->getQueueIterator(); }
 

From 1713dbe6318b310ba4414e8f8804fe5e0c8a155f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?=
 <benjamin.antal@clickhouse.com>
Date: Mon, 5 Jun 2023 12:07:05 +0000
Subject: [PATCH 52/64] Grant select on system tables to test user

---
 tests/queries/0_stateless/02771_system_user_processes.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/0_stateless/02771_system_user_processes.sh b/tests/queries/0_stateless/02771_system_user_processes.sh
index f0e5b2a6987..8e2fbfb5287 100755
--- a/tests/queries/0_stateless/02771_system_user_processes.sh
+++ b/tests/queries/0_stateless/02771_system_user_processes.sh
@@ -16,6 +16,7 @@ fi
 
 $CLICKHOUSE_CLIENT -q "DROP USER IF EXISTS $USER"
 $CLICKHOUSE_CLIENT -q "CREATE USER $USER"
+$CLICKHOUSE_CLIENT -q "GRANT SELECT ON system.* TO $USER"
 $CLICKHOUSE_CLIENT -u "$USER" -q "SELECT * FROM system.numbers LIMIT 1"
 $CLICKHOUSE_CLIENT -u "$USER" -q "SELECT * FROM system.numbers LIMIT 1"
 $CLICKHOUSE_CLIENT -q "SELECT user, toBool(ProfileEvents['SelectQuery'] > 0), toBool(ProfileEvents['Query'] > 0) FROM system.user_processes WHERE user='default'"

From 90e9df9109433971d70d544e848eeead361b96f8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?=
 <benjamin.antal@clickhouse.com>
Date: Mon, 5 Jun 2023 12:27:46 +0000
Subject: [PATCH 53/64] Revert "Add `SHOW USER PROCESSES` query"

This reverts commit d28b4181e94c5602b5512af8ed541dcc2a1a55f2.
---
 src/Interpreters/InterpreterFactory.cpp       |  6 ----
 .../InterpreterShowUserProcessesQuery.cpp     | 18 -----------
 .../InterpreterShowUserProcessesQuery.h       | 30 -----------------
 src/Parsers/ASTShowUserProcessesQuery.h       | 17 ----------
 src/Parsers/ParserQueryWithOutput.cpp         |  5 +--
 src/Parsers/ParserShowUserProcessesQuery.h    | 32 -------------------
 6 files changed, 1 insertion(+), 107 deletions(-)
 delete mode 100644 src/Interpreters/InterpreterShowUserProcessesQuery.cpp
 delete mode 100644 src/Interpreters/InterpreterShowUserProcessesQuery.h
 delete mode 100644 src/Parsers/ASTShowUserProcessesQuery.h
 delete mode 100644 src/Parsers/ParserShowUserProcessesQuery.h

diff --git a/src/Interpreters/InterpreterFactory.cpp b/src/Interpreters/InterpreterFactory.cpp
index c31e3801478..9cd1f2a251c 100644
--- a/src/Interpreters/InterpreterFactory.cpp
+++ b/src/Interpreters/InterpreterFactory.cpp
@@ -23,7 +23,6 @@
 #include <Parsers/ASTShowTablesQuery.h>
 #include <Parsers/ASTShowColumnsQuery.h>
 #include <Parsers/ASTShowIndexesQuery.h>
-#include <Parsers/ASTShowUserProcessesQuery.h>
 #include <Parsers/ASTUseQuery.h>
 #include <Parsers/ASTWatchQuery.h>
 #include <Parsers/ASTCreateNamedCollectionQuery.h>
@@ -82,7 +81,6 @@
 #include <Interpreters/InterpreterShowEngineQuery.h>
 #include <Interpreters/InterpreterShowProcesslistQuery.h>
 #include <Interpreters/InterpreterShowTablesQuery.h>
-#include <Interpreters/InterpreterShowUserProcessesQuery.h>
 #include <Interpreters/InterpreterShowColumnsQuery.h>
 #include <Interpreters/InterpreterShowIndexesQuery.h>
 #include <Interpreters/InterpreterSystemQuery.h>
@@ -268,10 +266,6 @@ std::unique_ptr<IInterpreter> InterpreterFactory::get(ASTPtr & query, ContextMut
     {
         return std::make_unique<InterpreterShowProcesslistQuery>(query, context);
     }
-    else if (query->as<ASTShowUserProcessesQuery>())
-    {
-        return std::make_unique<InterpreterShowUserProcessesQuery>(query, context);
-    }
     else if (query->as<ASTAlterQuery>())
     {
         return std::make_unique<InterpreterAlterQuery>(query, context);
diff --git a/src/Interpreters/InterpreterShowUserProcessesQuery.cpp b/src/Interpreters/InterpreterShowUserProcessesQuery.cpp
deleted file mode 100644
index 51287a7ad5b..00000000000
--- a/src/Interpreters/InterpreterShowUserProcessesQuery.cpp
+++ /dev/null
@@ -1,18 +0,0 @@
-#include <IO/ReadBufferFromString.h>
-
-#include <Interpreters/Context.h>
-#include <Interpreters/executeQuery.h>
-#include <Interpreters/InterpreterShowUserProcessesQuery.h>
-
-#include <Parsers/ASTQueryWithOutput.h>
-
-
-namespace DB
-{
-
-BlockIO InterpreterShowUserProcessesQuery::execute()
-{
-    return executeQuery("SELECT * FROM system.user_processes ORDER BY user DESC", getContext(), true);
-}
-
-}
diff --git a/src/Interpreters/InterpreterShowUserProcessesQuery.h b/src/Interpreters/InterpreterShowUserProcessesQuery.h
deleted file mode 100644
index a1c385dc82f..00000000000
--- a/src/Interpreters/InterpreterShowUserProcessesQuery.h
+++ /dev/null
@@ -1,30 +0,0 @@
-#pragma once
-
-#include <Interpreters/IInterpreter.h>
-#include <Parsers/IAST_fwd.h>
-
-
-namespace DB
-{
-
-/** Return list of currently executing queries.
-TODO(antaljanosbenjamin)
-  */
-class InterpreterShowUserProcessesQuery : public IInterpreter, WithMutableContext
-{
-public:
-    InterpreterShowUserProcessesQuery(const ASTPtr & query_ptr_, ContextMutablePtr context_)
-        : WithMutableContext(context_), query_ptr(query_ptr_) {}
-
-    BlockIO execute() override;
-
-    /// We ignore the quota and limits here because execute() will rewrite a show query as a SELECT query and then
-    /// the SELECT query will checks the quota and limits.
-    bool ignoreQuota() const override { return true; }
-    bool ignoreLimits() const override { return true; }
-
-private:
-    ASTPtr query_ptr;
-};
-
-}
diff --git a/src/Parsers/ASTShowUserProcessesQuery.h b/src/Parsers/ASTShowUserProcessesQuery.h
deleted file mode 100644
index cd522c152b6..00000000000
--- a/src/Parsers/ASTShowUserProcessesQuery.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#pragma once
-
-#include <Parsers/ASTQueryWithOutput.h>
-
-
-namespace DB
-{
-
-struct ASTShowUserProcessesIDAndQueryNames
-{
-    static constexpr auto ID = "ShowUserProcesses";
-    static constexpr auto Query = "SHOW USER PROCESSES";
-};
-
-using ASTShowUserProcessesQuery = ASTQueryWithOutputImpl<ASTShowUserProcessesIDAndQueryNames>;
-
-}
diff --git a/src/Parsers/ParserQueryWithOutput.cpp b/src/Parsers/ParserQueryWithOutput.cpp
index d5293e5f709..6796f4528c4 100644
--- a/src/Parsers/ParserQueryWithOutput.cpp
+++ b/src/Parsers/ParserQueryWithOutput.cpp
@@ -15,7 +15,6 @@
 #include <Parsers/ParserSelectWithUnionQuery.h>
 #include <Parsers/ParserSetQuery.h>
 #include <Parsers/ParserShowProcesslistQuery.h>
-#include <Parsers/ParserShowUserProcessesQuery.h>
 #include <Parsers/ParserShowTablesQuery.h>
 #include <Parsers/ParserShowColumnsQuery.h>
 #include <Parsers/ParserShowEngineQuery.h>
@@ -62,7 +61,6 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
     ParserShowGrantsQuery show_grants_p;
     ParserShowPrivilegesQuery show_privileges_p;
     ParserExplainQuery explain_p(end, allow_settings_after_format_in_insert);
-    ParserShowUserProcessesQuery show_user_processes_p;
 
     ASTPtr query;
 
@@ -90,8 +88,7 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
         || show_access_p.parse(pos, query, expected)
         || show_access_entities_p.parse(pos, query, expected)
         || show_grants_p.parse(pos, query, expected)
-        || show_privileges_p.parse(pos, query, expected)
-        || show_user_processes_p.parse(pos, query, expected);
+        || show_privileges_p.parse(pos, query, expected);
 
     if (!parsed)
         return false;
diff --git a/src/Parsers/ParserShowUserProcessesQuery.h b/src/Parsers/ParserShowUserProcessesQuery.h
deleted file mode 100644
index be484e74d5d..00000000000
--- a/src/Parsers/ParserShowUserProcessesQuery.h
+++ /dev/null
@@ -1,32 +0,0 @@
-#pragma once
-
-#include <Parsers/ASTShowUserProcessesQuery.h>
-#include <Parsers/CommonParsers.h>
-#include <Parsers/ExpressionElementParsers.h>
-#include <Parsers/IParserBase.h>
-
-
-namespace DB
-{
-
-/** Query SHOW USER PROCESSES
-  */
-class ParserShowUserProcessesQuery : public IParserBase
-{
-protected:
-    const char * getName() const override { return "SHOW USER PROCESSES query"; }
-
-    bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override
-    {
-        auto query = std::make_shared<ASTShowUserProcessesQuery>();
-
-        if (!ParserKeyword("SHOW USER PROCESSES").ignore(pos, expected))
-            return false;
-
-        node = query;
-
-        return true;
-    }
-};
-
-}

From 28eb9562b8a8904941a4b59e8ecf1c5b97aa70cc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?=
 <benjamin.antal@clickhouse.com>
Date: Mon, 5 Jun 2023 12:29:11 +0000
Subject: [PATCH 54/64] Remove the usage of `SHOW USER PROCESSES` from tests

---
 .../0_stateless/02771_system_user_processes.reference      | 1 -
 tests/queries/0_stateless/02771_system_user_processes.sh   | 7 -------
 2 files changed, 8 deletions(-)

diff --git a/tests/queries/0_stateless/02771_system_user_processes.reference b/tests/queries/0_stateless/02771_system_user_processes.reference
index 8c8ca8abb52..a55207ff3f4 100644
--- a/tests/queries/0_stateless/02771_system_user_processes.reference
+++ b/tests/queries/0_stateless/02771_system_user_processes.reference
@@ -1,4 +1,3 @@
-SHOW USER PROCESSES query succeeded!
 0
 0
 default	true	true
diff --git a/tests/queries/0_stateless/02771_system_user_processes.sh b/tests/queries/0_stateless/02771_system_user_processes.sh
index 8e2fbfb5287..c680283d36e 100755
--- a/tests/queries/0_stateless/02771_system_user_processes.sh
+++ b/tests/queries/0_stateless/02771_system_user_processes.sh
@@ -7,13 +7,6 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 USER_POSTFIX=`random_str 10`
 USER="test_user_02771_$USER_POSTFIX"
 
-if $CLICKHOUSE_CLIENT -q "SHOW USER PROCESSES" &>"${CLICKHOUSE_TMP}/test_output"
-then
-    echo "SHOW USER PROCESSES query succeeded!"
-else
-    cat "${CLICKHOUSE_TMP}/test_output"
-fi
-
 $CLICKHOUSE_CLIENT -q "DROP USER IF EXISTS $USER"
 $CLICKHOUSE_CLIENT -q "CREATE USER $USER"
 $CLICKHOUSE_CLIENT -q "GRANT SELECT ON system.* TO $USER"

From 5cb2d8b4e2849b015682ea05f2105258162f3335 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?=
 <benjamin.antal@clickhouse.com>
Date: Mon, 5 Jun 2023 12:32:25 +0000
Subject: [PATCH 55/64] Update failing tests

---
 tests/queries/0_stateless/01399_http_request_headers.reference | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/0_stateless/01399_http_request_headers.reference b/tests/queries/0_stateless/01399_http_request_headers.reference
index 90a10a9818d..92ea6606a12 100644
--- a/tests/queries/0_stateless/01399_http_request_headers.reference
+++ b/tests/queries/0_stateless/01399_http_request_headers.reference
@@ -6,6 +6,7 @@ Code: 516
 1
 Code: 516
 processes
+processes
 Code: 81
 [1]
 Code: 73

From c3d6e4c9155b22fe24018ad0099eef2ccd787f5b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?=
 <benjamin.antal@clickhouse.com>
Date: Mon, 5 Jun 2023 12:36:19 +0000
Subject: [PATCH 56/64] Fix docs

---
 docs/en/operations/system-tables/user_processes.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/operations/system-tables/user_processes.md b/docs/en/operations/system-tables/user_processes.md
index a9b97390ed6..94c153fb683 100644
--- a/docs/en/operations/system-tables/user_processes.md
+++ b/docs/en/operations/system-tables/user_processes.md
@@ -3,7 +3,7 @@ slug: /en/operations/system-tables/user_processes
 ---
 # user_processes
 
-This system table is used for implementing the `SHOW USER PROCESSES` query.
+This system table can be used to get overview of memory usage and ProfileEvents of users.
 
 Columns:
 

From 2b3db1d33c8be5dcfef3ba189a292d469f0f9676 Mon Sep 17 00:00:00 2001
From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com>
Date: Mon, 5 Jun 2023 17:05:06 +0200
Subject: [PATCH 57/64] Update Metadata.cpp

---
 src/Interpreters/Cache/Metadata.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp
index fea552c4071..5b6561a665e 100644
--- a/src/Interpreters/Cache/Metadata.cpp
+++ b/src/Interpreters/Cache/Metadata.cpp
@@ -346,7 +346,7 @@ void LockedKey::removeAllReleasable()
             ++it;
             continue;
         }
-        else if (it->second.evicting())
+        else if (it->second->evicting())
         {
             /// File segment is currently a removal candidate,
             /// we do not know if it will be removed or not yet,

From 3938309374cef05afb618c36d932bd380abb1651 Mon Sep 17 00:00:00 2001
From: ltrk2 <107155950+ltrk2@users.noreply.github.com>
Date: Mon, 5 Jun 2023 08:18:03 -0700
Subject: [PATCH 58/64] Implement review comments

---
 .../Serializations/SerializationUUID.cpp      |  2 +-
 src/IO/ReadHelpers.cpp                        | 27 ++++++++++---------
 src/IO/ReadHelpers.h                          |  1 -
 src/IO/WriteHelpers.cpp                       | 25 ++++++++---------
 src/IO/WriteHelpers.h                         |  7 +++--
 .../Formats/Impl/AvroRowInputFormat.cpp       |  2 +-
 .../Formats/Impl/AvroRowOutputFormat.cpp      |  4 +--
 7 files changed, 35 insertions(+), 33 deletions(-)

diff --git a/src/DataTypes/Serializations/SerializationUUID.cpp b/src/DataTypes/Serializations/SerializationUUID.cpp
index 13313111b2b..76be273d7dc 100644
--- a/src/DataTypes/Serializations/SerializationUUID.cpp
+++ b/src/DataTypes/Serializations/SerializationUUID.cpp
@@ -51,7 +51,7 @@ void SerializationUUID::deserializeTextQuoted(IColumn & column, ReadBuffer & ist
     {
         assertChar('\'', istr);
         char * next_pos = find_first_symbols<'\\', '\''>(istr.position(), istr.buffer().end());
-        const auto len = next_pos - istr.position();
+        const size_t len = next_pos - istr.position();
         if ((len == 32 || len == 36) && istr.position()[len] == '\'')
         {
             uuid = parseUUID(std::span(reinterpret_cast<const UInt8 *>(istr.position()), len));
diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp
index a85a057f2b3..99b3e4b514b 100644
--- a/src/IO/ReadHelpers.cpp
+++ b/src/IO/ReadHelpers.cpp
@@ -31,6 +31,7 @@ namespace ErrorCodes
     extern const int CANNOT_PARSE_QUOTED_STRING;
     extern const int CANNOT_PARSE_DATETIME;
     extern const int CANNOT_PARSE_DATE;
+    extern const int CANNOT_PARSE_UUID;
     extern const int INCORRECT_DATA;
     extern const int ATTEMPT_TO_READ_AFTER_EOF;
     extern const int LOGICAL_ERROR;
@@ -51,33 +52,35 @@ UUID parseUUID(std::span<const UInt8> src)
     UUID uuid;
     const auto * src_ptr = src.data();
     auto * dst = reinterpret_cast<UInt8 *>(&uuid);
-    if (const auto size = src.size(); size == 36)
+    const auto size = src.size();
+    if (size == 36)
     {
-#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-        parseHex<4>(src_ptr, dst);
-        parseHex<2>(src_ptr + 9, dst + 4);
-        parseHex<2>(src_ptr + 14, dst + 6);
-        parseHex<2>(src_ptr + 19, dst + 8);
-        parseHex<6>(src_ptr + 24, dst + 10);
-#else
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
         const std::reverse_iterator dst_it(dst + sizeof(UUID));
-        /// FIXME This code looks like trash.
         parseHex<4>(src_ptr, dst + 8);
         parseHex<2>(src_ptr + 9, dst + 12);
         parseHex<2>(src_ptr + 14, dst + 14);
         parseHex<2>(src_ptr + 19, dst);
         parseHex<6>(src_ptr + 24, dst + 2);
+#else
+        parseHex<4>(src_ptr, dst);
+        parseHex<2>(src_ptr + 9, dst + 4);
+        parseHex<2>(src_ptr + 14, dst + 6);
+        parseHex<2>(src_ptr + 19, dst + 8);
+        parseHex<6>(src_ptr + 24, dst + 10);
 #endif
     }
     else if (size == 32)
     {
-#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-        parseHex<16>(src_ptr, dst);
-#else
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
         parseHex<8>(src_ptr, dst + 8);
         parseHex<8>(src_ptr + 16, dst);
+#else
+        parseHex<16>(src_ptr, dst);
 #endif
     }
+    else
+        throw Exception(ErrorCodes::CANNOT_PARSE_UUID, "Unexpected length when trying to parse UUID ({})", size);
 
     return uuid;
 }
diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h
index 7e293944d19..804dab16db9 100644
--- a/src/IO/ReadHelpers.h
+++ b/src/IO/ReadHelpers.h
@@ -765,7 +765,6 @@ inline bool tryReadDateText(ExtendedDayNum & date, ReadBuffer & buf)
     return readDateTextImpl<bool>(date, buf);
 }
 
-/// If string is not like UUID - implementation specific behaviour.
 UUID parseUUID(std::span<const UInt8> src);
 
 template <typename ReturnType = void>
diff --git a/src/IO/WriteHelpers.cpp b/src/IO/WriteHelpers.cpp
index 6023d4c9d5b..4f1a95181d4 100644
--- a/src/IO/WriteHelpers.cpp
+++ b/src/IO/WriteHelpers.cpp
@@ -20,25 +20,12 @@ void formatHex(IteratorSrc src, IteratorDst dst, size_t num_bytes)
     }
 }
 
-/** Function used when byte ordering is important when parsing uuid
- *  ex: When we create an UUID type
- */
 std::array<char, 36> formatUUID(const UUID & uuid)
 {
     std::array<char, 36> dst;
     const auto * src_ptr = reinterpret_cast<const UInt8 *>(&uuid);
     auto * dst_ptr = dst.data();
-#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-    formatHex(src_ptr, dst_ptr, 4);
-    dst[8] = '-';
-    formatHex(src_ptr + 4, dst_ptr + 9, 2);
-    dst[13] = '-';
-    formatHex(src_ptr + 6, dst_ptr + 14, 2);
-    dst[18] = '-';
-    formatHex(src_ptr + 8, dst_ptr + 19, 2);
-    dst[23] = '-';
-    formatHex(src_ptr + 10, dst_ptr + 24, 6);
-#else
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
     const std::reverse_iterator src_it(src_ptr + 16);
     formatHex(src_it + 8, dst_ptr, 4);
     dst[8] = '-';
@@ -49,6 +36,16 @@ std::array<char, 36> formatUUID(const UUID & uuid)
     formatHex(src_it, dst_ptr + 19, 2);
     dst[23] = '-';
     formatHex(src_it + 2, dst_ptr + 24, 6);
+#else
+    formatHex(src_ptr, dst_ptr, 4);
+    dst[8] = '-';
+    formatHex(src_ptr + 4, dst_ptr + 9, 2);
+    dst[13] = '-';
+    formatHex(src_ptr + 6, dst_ptr + 14, 2);
+    dst[18] = '-';
+    formatHex(src_ptr + 8, dst_ptr + 19, 2);
+    dst[23] = '-';
+    formatHex(src_ptr + 10, dst_ptr + 24, 6);
 #endif
 
     return dst;
diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h
index 923684c4249..056c2ca1b50 100644
--- a/src/IO/WriteHelpers.h
+++ b/src/IO/WriteHelpers.h
@@ -625,12 +625,15 @@ inline void writeXMLStringForTextElement(std::string_view s, WriteBuffer & buf)
     writeXMLStringForTextElement(s.data(), s.data() + s.size(), buf);
 }
 
+/// @brief Serialize `uuid` into an array of characters in big-endian byte order.
+/// @param uuid UUID to serialize.
+/// @return Array of characters in big-endian byte order.
 std::array<char, 36> formatUUID(const UUID & uuid);
 
 inline void writeUUIDText(const UUID & uuid, WriteBuffer & buf)
 {
-    const auto text = formatUUID(uuid);
-    buf.write(text.data(), text.size());
+    const auto serialized_uuid = formatUUID(uuid);
+    buf.write(serialized_uuid.data(), serialized_uuid.size());
 }
 
 void writeIPv4Text(const IPv4 & ip, WriteBuffer & buf);
diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
index 974b198a483..a4d4e374f4f 100644
--- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
@@ -256,7 +256,7 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(const avro
                     if (tmp.length() != 36)
                         throw ParsingException(ErrorCodes::CANNOT_PARSE_UUID, "Cannot parse uuid {}", tmp);
 
-                    const auto uuid = parseUUID({reinterpret_cast<const UInt8 *>(tmp.data()), tmp.length()});
+                    const UUID uuid = parseUUID({reinterpret_cast<const UInt8 *>(tmp.data()), tmp.length()});
                     assert_cast<DataTypeUUID::ColumnType &>(column).insertValue(uuid);
                     return true;
                 };
diff --git a/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp b/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp
index 2b163164d56..f0985e7cffc 100644
--- a/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp
@@ -329,8 +329,8 @@ AvroSerializer::SchemaWithSerializeFn AvroSerializer::createSchemaWithSerializeF
             return {schema, [](const IColumn & column, size_t row_num, avro::Encoder & encoder)
             {
                 const auto & uuid = assert_cast<const DataTypeUUID::ColumnType &>(column).getElement(row_num);
-                const auto text = formatUUID(uuid);
-                encoder.encodeBytes(reinterpret_cast<const uint8_t *>(text.data()), text.size());
+                const auto serialized_uuid = formatUUID(uuid);
+                encoder.encodeBytes(reinterpret_cast<const uint8_t *>(serialized_uuid.data()), serialized_uuid.size());
             }};
         }
         case TypeIndex::Array:

From 0832cb2d7a3bdd2c5e4721c8d8d71a14ce4485ee Mon Sep 17 00:00:00 2001
From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com>
Date: Mon, 5 Jun 2023 17:51:12 +0200
Subject: [PATCH 59/64] Update Metadata.h

---
 src/Interpreters/Cache/Metadata.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/Cache/Metadata.h b/src/Interpreters/Cache/Metadata.h
index 4732123fabc..64f91595822 100644
--- a/src/Interpreters/Cache/Metadata.h
+++ b/src/Interpreters/Cache/Metadata.h
@@ -22,7 +22,7 @@ struct FileSegmentMetadata : private boost::noncopyable
 
     size_t size() const;
 
-    bool evicting() const { return !removal_candidate.load(); }
+    bool evicting() const { return removal_candidate.load(); }
 
     Priority::Iterator getQueueIterator() const { return file_segment->getQueueIterator(); }
 

From 654aee209f616aeef350e39cc3c3909862fa14e2 Mon Sep 17 00:00:00 2001
From: DanRoscigno <dan@roscigno.com>
Date: Mon, 5 Jun 2023 11:55:04 -0400
Subject: [PATCH 60/64] add video

---
 .../mergetree-family/invertedindexes.md              | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/docs/en/engines/table-engines/mergetree-family/invertedindexes.md b/docs/en/engines/table-engines/mergetree-family/invertedindexes.md
index 31f5a87a2b6..db3d6d0a479 100644
--- a/docs/en/engines/table-engines/mergetree-family/invertedindexes.md
+++ b/docs/en/engines/table-engines/mergetree-family/invertedindexes.md
@@ -15,6 +15,18 @@ tokenized cells of the string column. For example, the string cell "I will be a
 " wi", "wil", "ill", "ll ", "l b", " be" etc. The more fine-granular the input strings are tokenized, the bigger but also the more
 useful the resulting inverted index will be.
 
+<div class='vimeo-container'>
+  <iframe src="//www.youtube.com/embed/O_MnyUkrIq8"
+    width="640"
+    height="360"
+    frameborder="0"
+    allow="autoplay;
+    fullscreen;
+    picture-in-picture"
+    allowfullscreen>
+  </iframe>
+</div>
+
 :::note
 Inverted indexes are experimental and should not be used in production environments yet. They may change in the future in backward-incompatible
 ways, for example with respect to their DDL/DQL syntax or performance/compression characteristics.

From 35439a8b06501460fe9162e09eae0fa9b334d1a1 Mon Sep 17 00:00:00 2001
From: ltrk2 <107155950+ltrk2@users.noreply.github.com>
Date: Mon, 5 Jun 2023 10:47:52 -0700
Subject: [PATCH 61/64] Use reverse iterator for little-endian version

---
 src/IO/ReadHelpers.cpp | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp
index 99b3e4b514b..1bd67e240c9 100644
--- a/src/IO/ReadHelpers.cpp
+++ b/src/IO/ReadHelpers.cpp
@@ -53,15 +53,18 @@ UUID parseUUID(std::span<const UInt8> src)
     const auto * src_ptr = src.data();
     auto * dst = reinterpret_cast<UInt8 *>(&uuid);
     const auto size = src.size();
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+    const std::reverse_iterator dst_it(dst + sizeof(UUID));
+#endif
     if (size == 36)
     {
 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-        const std::reverse_iterator dst_it(dst + sizeof(UUID));
-        parseHex<4>(src_ptr, dst + 8);
-        parseHex<2>(src_ptr + 9, dst + 12);
-        parseHex<2>(src_ptr + 14, dst + 14);
-        parseHex<2>(src_ptr + 19, dst);
-        parseHex<6>(src_ptr + 24, dst + 2);
+        parseHex<4>(src_ptr, dst_it + 8);
+        parseHex<2>(src_ptr + 9, dst_it + 12);
+        parseHex<2>(src_ptr + 14, dst_it + 14);
+        parseHex<2>(src_ptr + 19, dst_it);
+        parseHex<6>(src_ptr + 24, dst_it + 2);
 #else
         parseHex<4>(src_ptr, dst);
         parseHex<2>(src_ptr + 9, dst + 4);
@@ -73,8 +76,8 @@ UUID parseUUID(std::span<const UInt8> src)
     else if (size == 32)
     {
 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-        parseHex<8>(src_ptr, dst + 8);
-        parseHex<8>(src_ptr + 16, dst);
+        parseHex<8>(src_ptr, dst_it + 8);
+        parseHex<8>(src_ptr + 16, dst_it);
 #else
         parseHex<16>(src_ptr, dst);
 #endif

From 9f80900d6f587383780d2a40f8173093dce68a5a Mon Sep 17 00:00:00 2001
From: Michael Kolupaev <michael.kolupaev@clickhouse.com>
Date: Thu, 1 Jun 2023 22:02:17 +0000
Subject: [PATCH 62/64] Changes related to an internal feature

---
 src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp           | 2 +-
 src/Disks/IO/ReadBufferFromRemoteFSGather.cpp             | 2 +-
 src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp   | 7 +++++--
 src/Disks/ObjectStorages/Cached/CachedObjectStorage.h     | 4 +++-
 src/IO/ReadSettings.h                                     | 2 ++
 src/Interpreters/Cache/FileSegment.h                      | 2 +-
 src/Interpreters/Cache/IFileCachePriority.h               | 1 +
 src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp      | 7 +++++++
 src/Storages/MergeTree/DataPartStorageOnDiskBase.h        | 1 +
 src/Storages/MergeTree/IDataPartStorage.h                 | 1 +
 .../configs/config.d/storage_conf.xml                     | 1 +
 .../configs/config.d/users.xml                            | 7 +++++++
 .../test_replicated_merge_tree_s3_zero_copy/test.py       | 8 ++++++--
 13 files changed, 37 insertions(+), 8 deletions(-)
 create mode 100644 tests/integration/test_replicated_merge_tree_s3_zero_copy/configs/config.d/users.xml

diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp
index 7c497baa450..877d8ff9bb7 100644
--- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp
+++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp
@@ -1219,7 +1219,7 @@ off_t CachedOnDiskReadBufferFromFile::getPosition()
 
 void CachedOnDiskReadBufferFromFile::assertCorrectness() const
 {
-    if (!CachedObjectStorage::canUseReadThroughCache()
+    if (!CachedObjectStorage::canUseReadThroughCache(settings)
         && !settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Cache usage is not allowed (query_id: {})", query_id);
 }
diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp
index 12fbbbcf747..04030fe5f8f 100644
--- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp
+++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp
@@ -36,7 +36,7 @@ ReadBufferFromRemoteFSGather::ReadBufferFromRemoteFSGather(
 
     with_cache = settings.remote_fs_cache
         && settings.enable_filesystem_cache
-        && (!query_id.empty() || settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache);
+        && (!query_id.empty() || settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache || !settings.avoid_readthrough_cache_outside_query_context);
 }
 
 SeekableReadBufferPtr ReadBufferFromRemoteFSGather::createImplementationBuffer(const StoredObject & object)
diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp
index 1d24d9d5411..3e73e45638b 100644
--- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp
@@ -57,7 +57,7 @@ ReadSettings CachedObjectStorage::patchSettings(const ReadSettings & read_settin
     ReadSettings modified_settings{read_settings};
     modified_settings.remote_fs_cache = cache;
 
-    if (!canUseReadThroughCache())
+    if (!canUseReadThroughCache(read_settings))
         modified_settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = true;
 
     return object_storage->patchSettings(modified_settings);
@@ -227,8 +227,11 @@ String CachedObjectStorage::getObjectsNamespace() const
     return object_storage->getObjectsNamespace();
 }
 
-bool CachedObjectStorage::canUseReadThroughCache()
+bool CachedObjectStorage::canUseReadThroughCache(const ReadSettings & settings)
 {
+    if (!settings.avoid_readthrough_cache_outside_query_context)
+        return true;
+
     return CurrentThread::isInitialized()
         && CurrentThread::get().getQueryContext()
         && !CurrentThread::getQueryId().empty();
diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h
index b5186d39c32..ba9fbd02d94 100644
--- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h
+++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h
@@ -112,7 +112,9 @@ public:
 
     WriteSettings getAdjustedSettingsFromMetadataFile(const WriteSettings & settings, const std::string & path) const override;
 
-    static bool canUseReadThroughCache();
+    const FileCacheSettings & getCacheSettings() const { return cache_settings; }
+
+    static bool canUseReadThroughCache(const ReadSettings & settings);
 
 private:
     FileCache::Key getCacheKey(const std::string & path) const;
diff --git a/src/IO/ReadSettings.h b/src/IO/ReadSettings.h
index e43ecd7f275..dae4261e92c 100644
--- a/src/IO/ReadSettings.h
+++ b/src/IO/ReadSettings.h
@@ -99,6 +99,8 @@ struct ReadSettings
     bool read_from_filesystem_cache_if_exists_otherwise_bypass_cache = false;
     bool enable_filesystem_cache_log = false;
     bool is_file_cache_persistent = false; /// Some files can be made non-evictable.
+    /// Don't populate cache when the read is not part of query execution (e.g. background thread).
+    bool avoid_readthrough_cache_outside_query_context = true;
 
     size_t filesystem_cache_max_download_size = (128UL * 1024 * 1024 * 1024);
     bool skip_download_if_exceeds_query_cache = true;
diff --git a/src/Interpreters/Cache/FileSegment.h b/src/Interpreters/Cache/FileSegment.h
index 163a15fcfda..75395a671f4 100644
--- a/src/Interpreters/Cache/FileSegment.h
+++ b/src/Interpreters/Cache/FileSegment.h
@@ -85,7 +85,7 @@ public:
         EMPTY,
         /**
          * A newly created file segment never has DOWNLOADING state until call to getOrSetDownloader
-         * because each cache user might acquire multiple file segments and reads them one by one,
+         * because each cache user might acquire multiple file segments and read them one by one,
          * so only user which actually needs to read this segment earlier than others - becomes a downloader.
          */
         DOWNLOADING,
diff --git a/src/Interpreters/Cache/IFileCachePriority.h b/src/Interpreters/Cache/IFileCachePriority.h
index ad63dcc7ea5..93343398783 100644
--- a/src/Interpreters/Cache/IFileCachePriority.h
+++ b/src/Interpreters/Cache/IFileCachePriority.h
@@ -85,6 +85,7 @@ public:
 
     virtual void removeAll(const CacheGuard::Lock &) = 0;
 
+    /// From lowest to highest priority.
     virtual void iterate(IterateFunc && func, const CacheGuard::Lock &) = 0;
 
 private:
diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp
index cfc3ff58f81..30776a8bc50 100644
--- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp
+++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp
@@ -202,6 +202,13 @@ bool DataPartStorageOnDiskBase::isStoredOnRemoteDisk() const
     return volume->getDisk()->isRemote();
 }
 
+std::optional<String> DataPartStorageOnDiskBase::getCacheName() const
+{
+    if (volume->getDisk()->supportsCache())
+        return volume->getDisk()->getCacheName();
+    return std::nullopt;
+}
+
 bool DataPartStorageOnDiskBase::supportZeroCopyReplication() const
 {
     return volume->getDisk()->supportZeroCopyReplication();
diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.h b/src/Storages/MergeTree/DataPartStorageOnDiskBase.h
index 6b27b7296fc..043953eb20c 100644
--- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.h
+++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.h
@@ -36,6 +36,7 @@ public:
     std::string getDiskName() const override;
     std::string getDiskType() const override;
     bool isStoredOnRemoteDisk() const override;
+    std::optional<String> getCacheName() const override;
     bool supportZeroCopyReplication() const override;
     bool supportParallelWrite() const override;
     bool isBroken() const override;
diff --git a/src/Storages/MergeTree/IDataPartStorage.h b/src/Storages/MergeTree/IDataPartStorage.h
index f160254350d..933c9bd9958 100644
--- a/src/Storages/MergeTree/IDataPartStorage.h
+++ b/src/Storages/MergeTree/IDataPartStorage.h
@@ -149,6 +149,7 @@ public:
     virtual std::string getDiskName() const = 0;
     virtual std::string getDiskType() const = 0;
     virtual bool isStoredOnRemoteDisk() const { return false; }
+    virtual std::optional<String> getCacheName() const { return std::nullopt; }
     virtual bool supportZeroCopyReplication() const { return false; }
     virtual bool supportParallelWrite() const = 0;
     virtual bool isBroken() const = 0;
diff --git a/tests/integration/test_replicated_merge_tree_s3_zero_copy/configs/config.d/storage_conf.xml b/tests/integration/test_replicated_merge_tree_s3_zero_copy/configs/config.d/storage_conf.xml
index 15239041478..96d59d5633e 100644
--- a/tests/integration/test_replicated_merge_tree_s3_zero_copy/configs/config.d/storage_conf.xml
+++ b/tests/integration/test_replicated_merge_tree_s3_zero_copy/configs/config.d/storage_conf.xml
@@ -12,6 +12,7 @@
                 <disk>s3</disk>
                 <max_size>100000000</max_size>
                 <path>./cache_s3/</path>
+                <cache_on_write_operations>1</cache_on_write_operations>
             </cache_s3>
         </disks>
         <policies>
diff --git a/tests/integration/test_replicated_merge_tree_s3_zero_copy/configs/config.d/users.xml b/tests/integration/test_replicated_merge_tree_s3_zero_copy/configs/config.d/users.xml
new file mode 100644
index 00000000000..5de169edc1e
--- /dev/null
+++ b/tests/integration/test_replicated_merge_tree_s3_zero_copy/configs/config.d/users.xml
@@ -0,0 +1,7 @@
+<clickhouse>
+    <profiles>
+        <default>
+            <enable_filesystem_cache_on_write_operations>1</enable_filesystem_cache_on_write_operations>
+        </default>
+    </profiles>
+</clickhouse>
diff --git a/tests/integration/test_replicated_merge_tree_s3_zero_copy/test.py b/tests/integration/test_replicated_merge_tree_s3_zero_copy/test.py
index eca18820016..72a01d278d8 100644
--- a/tests/integration/test_replicated_merge_tree_s3_zero_copy/test.py
+++ b/tests/integration/test_replicated_merge_tree_s3_zero_copy/test.py
@@ -19,6 +19,7 @@ def cluster():
         cluster.add_instance(
             "node1",
             main_configs=["configs/config.d/storage_conf.xml"],
+            user_configs=["configs/config.d/users.xml"],
             macros={"replica": "1"},
             with_minio=True,
             with_zookeeper=True,
@@ -26,12 +27,14 @@ def cluster():
         cluster.add_instance(
             "node2",
             main_configs=["configs/config.d/storage_conf.xml"],
+            user_configs=["configs/config.d/users.xml"],
             macros={"replica": "2"},
             with_zookeeper=True,
         )
         cluster.add_instance(
             "node3",
             main_configs=["configs/config.d/storage_conf.xml"],
+            user_configs=["configs/config.d/users.xml"],
             macros={"replica": "3"},
             with_zookeeper=True,
         )
@@ -74,7 +77,7 @@ def generate_values(date_str, count, sign=1):
 
 def create_table(cluster, additional_settings=None):
     create_table_statement = """
-        CREATE TABLE s3_test ON CLUSTER cluster(
+        CREATE TABLE s3_test ON CLUSTER cluster (
             dt Date,
             id Int64,
             data String,
@@ -95,7 +98,8 @@ def create_table(cluster, additional_settings=None):
 def drop_table(cluster):
     yield
     for node in list(cluster.instances.values()):
-        node.query("DROP TABLE IF EXISTS s3_test")
+        node.query("DROP TABLE IF EXISTS s3_test SYNC")
+        node.query("DROP TABLE IF EXISTS test_drop_table SYNC")
 
     minio = cluster.minio_client
     # Remove extra objects to prevent tests cascade failing

From e87348010d3e77d60b8ccd85e7bd4574bec9600b Mon Sep 17 00:00:00 2001
From: Nikita Mikhaylov <mikhaylovnikitka@gmail.com>
Date: Tue, 6 Jun 2023 14:42:56 +0200
Subject: [PATCH 63/64] Rework loading and removing of data parts for MergeTree
 tables. (#49474)

Co-authored-by: Sergei Trifonov <sergei@clickhouse.com>
---
 programs/local/LocalServer.cpp                |  24 +-
 programs/server/Server.cpp                    |  53 +++-
 src/Backups/BackupIO_S3.cpp                   |   8 +-
 src/Common/CurrentMetrics.cpp                 |   2 +
 src/Core/ServerSettings.h                     |   4 +-
 src/Formats/FormatFactory.cpp                 |   2 +-
 src/IO/SharedThreadPools.cpp                  | 151 ++++++----
 src/IO/SharedThreadPools.h                    |  74 +++--
 src/Interpreters/threadPoolCallbackRunner.h   |   3 +
 src/Storages/MergeTree/MergeTreeData.cpp      | 284 +++++++-----------
 src/Storages/MergeTree/MergeTreeData.h        |   6 +-
 src/Storages/MergeTree/MergeTreeSettings.h    |   5 +-
 src/Storages/MergeTree/MergeTreeSource.cpp    |   2 +-
 src/Storages/StorageS3.cpp                    |   2 +-
 .../System/StorageSystemDetachedParts.cpp     |   2 +-
 .../00988_parallel_parts_removal.sql          |   2 +-
 .../00989_parallel_parts_loading.sql          |   2 +-
 .../01810_max_part_removal_threads_long.sh    |  11 +-
 .../02432_s3_parallel_parts_cleanup.sql       |   5 +-
 19 files changed, 352 insertions(+), 290 deletions(-)

diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp
index 96c1ca261b5..caca7cfb50d 100644
--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@@ -130,15 +130,31 @@ void LocalServer::initialize(Poco::Util::Application & self)
     });
 #endif
 
-    IOThreadPool::initialize(
+    getIOThreadPool().initialize(
         config().getUInt("max_io_thread_pool_size", 100),
         config().getUInt("max_io_thread_pool_free_size", 0),
         config().getUInt("io_thread_pool_queue_size", 10000));
 
-    OutdatedPartsLoadingThreadPool::initialize(
-        config().getUInt("max_outdated_parts_loading_thread_pool_size", 16),
+
+    const size_t active_parts_loading_threads = config().getUInt("max_active_parts_loading_thread_pool_size", 64);
+    getActivePartsLoadingThreadPool().initialize(
+        active_parts_loading_threads,
         0, // We don't need any threads one all the parts will be loaded
-        config().getUInt("max_outdated_parts_loading_thread_pool_size", 16));
+        active_parts_loading_threads);
+
+    const size_t outdated_parts_loading_threads = config().getUInt("max_outdated_parts_loading_thread_pool_size", 32);
+    getOutdatedPartsLoadingThreadPool().initialize(
+        outdated_parts_loading_threads,
+        0, // We don't need any threads one all the parts will be loaded
+        outdated_parts_loading_threads);
+
+    getOutdatedPartsLoadingThreadPool().setMaxTurboThreads(active_parts_loading_threads);
+
+    const size_t cleanup_threads = config().getUInt("max_parts_cleaning_thread_pool_size", 128);
+    getPartsCleaningThreadPool().initialize(
+        cleanup_threads,
+        0, // We don't need any threads one all the parts will be deleted
+        cleanup_threads);
 }
 
 
diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 9eb3e6c9ebc..d0fc8aca5e8 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -683,21 +683,36 @@ try
     });
 #endif
 
-    IOThreadPool::initialize(
+    getIOThreadPool().initialize(
         server_settings.max_io_thread_pool_size,
         server_settings.max_io_thread_pool_free_size,
         server_settings.io_thread_pool_queue_size);
 
-    BackupsIOThreadPool::initialize(
+    getBackupsIOThreadPool().initialize(
         server_settings.max_backups_io_thread_pool_size,
         server_settings.max_backups_io_thread_pool_free_size,
         server_settings.backups_io_thread_pool_queue_size);
 
-    OutdatedPartsLoadingThreadPool::initialize(
+    getActivePartsLoadingThreadPool().initialize(
+        server_settings.max_active_parts_loading_thread_pool_size,
+        0, // We don't need any threads once all the parts will be loaded
+        server_settings.max_active_parts_loading_thread_pool_size);
+
+    getOutdatedPartsLoadingThreadPool().initialize(
         server_settings.max_outdated_parts_loading_thread_pool_size,
-        0, // We don't need any threads one all the parts will be loaded
+        0, // We don't need any threads once all the parts will be loaded
         server_settings.max_outdated_parts_loading_thread_pool_size);
 
+    /// It could grow if we need to synchronously wait until all the data parts will be loaded.
+    getOutdatedPartsLoadingThreadPool().setMaxTurboThreads(
+        server_settings.max_active_parts_loading_thread_pool_size
+    );
+
+    getPartsCleaningThreadPool().initialize(
+        server_settings.max_parts_cleaning_thread_pool_size,
+        0, // We don't need any threads one all the parts will be deleted
+        server_settings.max_parts_cleaning_thread_pool_size);
+
     /// Initialize global local cache for remote filesystem.
     if (config().has("local_cache_for_remote_fs"))
     {
@@ -1226,6 +1241,36 @@ try
             global_context->getMessageBrokerSchedulePool().increaseThreadsCount(server_settings_.background_message_broker_schedule_pool_size);
             global_context->getDistributedSchedulePool().increaseThreadsCount(server_settings_.background_distributed_schedule_pool_size);
 
+            getIOThreadPool().reloadConfiguration(
+                server_settings.max_io_thread_pool_size,
+                server_settings.max_io_thread_pool_free_size,
+                server_settings.io_thread_pool_queue_size);
+
+            getBackupsIOThreadPool().reloadConfiguration(
+                server_settings.max_backups_io_thread_pool_size,
+                server_settings.max_backups_io_thread_pool_free_size,
+                server_settings.backups_io_thread_pool_queue_size);
+
+            getActivePartsLoadingThreadPool().reloadConfiguration(
+                server_settings.max_active_parts_loading_thread_pool_size,
+                0, // We don't need any threads once all the parts will be loaded
+                server_settings.max_active_parts_loading_thread_pool_size);
+
+            getOutdatedPartsLoadingThreadPool().reloadConfiguration(
+                server_settings.max_outdated_parts_loading_thread_pool_size,
+                0, // We don't need any threads once all the parts will be loaded
+                server_settings.max_outdated_parts_loading_thread_pool_size);
+
+            /// It could grow if we need to synchronously wait until all the data parts will be loaded.
+            getOutdatedPartsLoadingThreadPool().setMaxTurboThreads(
+                server_settings.max_active_parts_loading_thread_pool_size
+            );
+
+            getPartsCleaningThreadPool().reloadConfiguration(
+                server_settings.max_parts_cleaning_thread_pool_size,
+                0, // We don't need any threads one all the parts will be deleted
+                server_settings.max_parts_cleaning_thread_pool_size);
+
             if (config->has("resources"))
             {
                 global_context->getResourceManager()->updateConfiguration(*config);
diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp
index f1fd276e34b..967beba4bf5 100644
--- a/src/Backups/BackupIO_S3.cpp
+++ b/src/Backups/BackupIO_S3.cpp
@@ -161,7 +161,7 @@ void BackupReaderS3::copyFileToDisk(const String & path_in_backup, size_t file_s
                 /* dest_key= */ blob_path[0],
                 request_settings,
                 object_attributes,
-                threadPoolCallbackRunner<void>(BackupsIOThreadPool::get(), "BackupReaderS3"),
+                threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupReaderS3"),
                 /* for_disk_s3= */ true);
 
             return file_size;
@@ -212,7 +212,7 @@ void BackupWriterS3::copyFileFromDisk(const String & path_in_backup, DiskPtr src
                 fs::path(s3_uri.key) / path_in_backup,
                 request_settings,
                 {},
-                threadPoolCallbackRunner<void>(BackupsIOThreadPool::get(), "BackupWriterS3"));
+                threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWriterS3"));
             return; /// copied!
         }
     }
@@ -224,7 +224,7 @@ void BackupWriterS3::copyFileFromDisk(const String & path_in_backup, DiskPtr src
 void BackupWriterS3::copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length)
 {
     copyDataToS3File(create_read_buffer, start_pos, length, client, s3_uri.bucket, fs::path(s3_uri.key) / path_in_backup, request_settings, {},
-                     threadPoolCallbackRunner<void>(BackupsIOThreadPool::get(), "BackupWriterS3"));
+                     threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWriterS3"));
 }
 
 BackupWriterS3::~BackupWriterS3() = default;
@@ -258,7 +258,7 @@ std::unique_ptr<WriteBuffer> BackupWriterS3::writeFile(const String & file_name)
         DBMS_DEFAULT_BUFFER_SIZE,
         request_settings,
         std::nullopt,
-        threadPoolCallbackRunner<void>(BackupsIOThreadPool::get(), "BackupWriterS3"),
+        threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWriterS3"),
         write_settings);
 }
 
diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp
index 956487a300e..61725d079bf 100644
--- a/src/Common/CurrentMetrics.cpp
+++ b/src/Common/CurrentMetrics.cpp
@@ -137,6 +137,8 @@
     M(ObjectStorageAzureThreadsActive, "Number of threads in the AzureObjectStorage thread pool running a task.") \
     M(MergeTreePartsLoaderThreads, "Number of threads in the MergeTree parts loader thread pool.") \
     M(MergeTreePartsLoaderThreadsActive, "Number of threads in the MergeTree parts loader thread pool running a task.") \
+    M(MergeTreeOutdatedPartsLoaderThreads, "Number of threads in the threadpool for loading Outdated data parts.") \
+    M(MergeTreeOutdatedPartsLoaderThreadsActive, "Number of active threads in the threadpool for loading Outdated data parts.") \
     M(MergeTreePartsCleanerThreads, "Number of threads in the MergeTree parts cleaner thread pool.") \
     M(MergeTreePartsCleanerThreadsActive, "Number of threads in the MergeTree parts cleaner thread pool running a task.") \
     M(SystemReplicasThreads, "Number of threads in the system.replicas thread pool.") \
diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h
index cb43d62ecd1..1a9f226041b 100644
--- a/src/Core/ServerSettings.h
+++ b/src/Core/ServerSettings.h
@@ -21,7 +21,9 @@ namespace DB
     M(UInt64, max_io_thread_pool_size, 100, "The maximum number of threads that would be used for IO operations", 0) \
     M(UInt64, max_io_thread_pool_free_size, 0, "Max free size for IO thread pool.", 0) \
     M(UInt64, io_thread_pool_queue_size, 10000, "Queue size for IO thread pool.", 0) \
-    M(UInt64, max_outdated_parts_loading_thread_pool_size, 32, "The maximum number of threads that would be used for loading outdated data parts on startup", 0) \
+    M(UInt64, max_active_parts_loading_thread_pool_size, 64, "The number of threads to load active set of data parts (Active ones) at startup.", 0) \
+    M(UInt64, max_outdated_parts_loading_thread_pool_size, 32, "The number of threads to load inactive set of data parts (Outdated ones) at startup.", 0) \
+    M(UInt64, max_parts_cleaning_thread_pool_size, 128, "The number of threads for concurrent removal of inactive data parts.", 0) \
     M(UInt64, max_replicated_fetches_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated fetches. Zero means unlimited.", 0) \
     M(UInt64, max_replicated_sends_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated sends. Zero means unlimited.", 0) \
     M(UInt64, max_remote_read_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for read. Zero means unlimited.", 0) \
diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index 6f2974c49c6..39b28e025a6 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -364,7 +364,7 @@ std::unique_ptr<ReadBuffer> FormatFactory::wrapReadBufferIfNeeded(
             settings.max_download_buffer_size);
 
         res = wrapInParallelReadBufferIfSupported(
-            buf, threadPoolCallbackRunner<void>(IOThreadPool::get(), "ParallelRead"),
+            buf, threadPoolCallbackRunner<void>(getIOThreadPool().get(), "ParallelRead"),
             max_download_threads, settings.max_download_buffer_size, file_size);
     }
 
diff --git a/src/IO/SharedThreadPools.cpp b/src/IO/SharedThreadPools.cpp
index b7b6aea1567..6a0e953f0ef 100644
--- a/src/IO/SharedThreadPools.cpp
+++ b/src/IO/SharedThreadPools.cpp
@@ -9,8 +9,12 @@ namespace CurrentMetrics
     extern const Metric IOThreadsActive;
     extern const Metric BackupsIOThreads;
     extern const Metric BackupsIOThreadsActive;
-    extern const Metric OutdatedPartsLoadingThreads;
-    extern const Metric OutdatedPartsLoadingThreadsActive;
+    extern const Metric MergeTreePartsLoaderThreads;
+    extern const Metric MergeTreePartsLoaderThreadsActive;
+    extern const Metric MergeTreePartsCleanerThreads;
+    extern const Metric MergeTreePartsCleanerThreadsActive;
+    extern const Metric MergeTreeOutdatedPartsLoaderThreads;
+    extern const Metric MergeTreeOutdatedPartsLoaderThreadsActive;
 }
 
 namespace DB
@@ -21,88 +25,117 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
-std::unique_ptr<ThreadPool> IOThreadPool::instance;
 
-void IOThreadPool::initialize(size_t max_threads, size_t max_free_threads, size_t queue_size)
+StaticThreadPool::StaticThreadPool(
+    const String & name_,
+    CurrentMetrics::Metric threads_metric_,
+    CurrentMetrics::Metric threads_active_metric_)
+    : name(name_)
+    , threads_metric(threads_metric_)
+    , threads_active_metric(threads_active_metric_)
+{
+}
+
+void StaticThreadPool::initialize(size_t max_threads, size_t max_free_threads, size_t queue_size)
 {
     if (instance)
-    {
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "The IO thread pool is initialized twice");
-    }
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "The {} is initialized twice", name);
 
+    /// By default enabling "turbo mode" won't affect the number of threads anyhow
+    max_threads_turbo = max_threads;
+    max_threads_normal = max_threads;
     instance = std::make_unique<ThreadPool>(
-        CurrentMetrics::IOThreads,
-        CurrentMetrics::IOThreadsActive,
+        threads_metric,
+        threads_active_metric,
         max_threads,
         max_free_threads,
         queue_size,
         /* shutdown_on_exception= */ false);
 }
 
-ThreadPool & IOThreadPool::get()
+void StaticThreadPool::reloadConfiguration(size_t max_threads, size_t max_free_threads, size_t queue_size)
 {
     if (!instance)
-    {
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "The IO thread pool is not initialized");
-    }
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "The {} is not initialized", name);
+
+    instance->setMaxThreads(turbo_mode_enabled > 0 ? max_threads_turbo : max_threads);
+    instance->setMaxFreeThreads(max_free_threads);
+    instance->setQueueSize(queue_size);
+}
+
+
+ThreadPool & StaticThreadPool::get()
+{
+    if (!instance)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "The {} is not initialized", name);
 
     return *instance;
 }
 
-std::unique_ptr<ThreadPool> BackupsIOThreadPool::instance;
-
-void BackupsIOThreadPool::initialize(size_t max_threads, size_t max_free_threads, size_t queue_size)
-{
-    if (instance)
-    {
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "The BackupsIO thread pool is initialized twice");
-    }
-
-    instance = std::make_unique<ThreadPool>(
-        CurrentMetrics::BackupsIOThreads,
-        CurrentMetrics::BackupsIOThreadsActive,
-        max_threads,
-        max_free_threads,
-        queue_size,
-        /* shutdown_on_exception= */ false);
-}
-
-ThreadPool & BackupsIOThreadPool::get()
+void StaticThreadPool::enableTurboMode()
 {
     if (!instance)
-    {
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "The BackupsIO thread pool is not initialized");
-    }
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "The {} is not initialized", name);
 
-    return *instance;
+    std::lock_guard lock(mutex);
+
+    ++turbo_mode_enabled;
+    if (turbo_mode_enabled == 1)
+        instance->setMaxThreads(max_threads_turbo);
 }
 
-std::unique_ptr<ThreadPool> OutdatedPartsLoadingThreadPool::instance;
-
-void OutdatedPartsLoadingThreadPool::initialize(size_t max_threads, size_t max_free_threads, size_t queue_size)
-{
-    if (instance)
-    {
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "The PartsLoadingThreadPool thread pool is initialized twice");
-    }
-
-    instance = std::make_unique<ThreadPool>(
-        CurrentMetrics::OutdatedPartsLoadingThreads,
-        CurrentMetrics::OutdatedPartsLoadingThreadsActive,
-        max_threads,
-        max_free_threads,
-        queue_size,
-        /* shutdown_on_exception= */ false);
-}
-
-ThreadPool & OutdatedPartsLoadingThreadPool::get()
+void StaticThreadPool::disableTurboMode()
 {
     if (!instance)
-    {
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "The PartsLoadingThreadPool thread pool is not initialized");
-    }
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "The {} is not initialized", name);
 
-    return *instance;
+    std::lock_guard lock(mutex);
+
+    --turbo_mode_enabled;
+    if (turbo_mode_enabled == 0)
+        instance->setMaxThreads(max_threads_normal);
+}
+
+void StaticThreadPool::setMaxTurboThreads(size_t max_threads_turbo_)
+{
+    if (!instance)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "The {} is not initialized", name);
+
+    std::lock_guard lock(mutex);
+
+    max_threads_turbo = max_threads_turbo_;
+    if (turbo_mode_enabled > 0)
+        instance->setMaxThreads(max_threads_turbo);
+}
+
+StaticThreadPool & getIOThreadPool()
+{
+    static StaticThreadPool instance("IOThreadPool", CurrentMetrics::IOThreads, CurrentMetrics::IOThreadsActive);
+    return instance;
+}
+
+StaticThreadPool & getBackupsIOThreadPool()
+{
+    static StaticThreadPool instance("BackupsIOThreadPool", CurrentMetrics::BackupsIOThreads, CurrentMetrics::BackupsIOThreadsActive);
+    return instance;
+}
+
+StaticThreadPool & getActivePartsLoadingThreadPool()
+{
+    static StaticThreadPool instance("MergeTreePartsLoaderThreadPool", CurrentMetrics::MergeTreePartsLoaderThreads, CurrentMetrics::MergeTreePartsLoaderThreadsActive);
+    return instance;
+}
+
+StaticThreadPool & getPartsCleaningThreadPool()
+{
+    static StaticThreadPool instance("MergeTreePartsCleanerThreadPool", CurrentMetrics::MergeTreePartsCleanerThreads, CurrentMetrics::MergeTreePartsCleanerThreadsActive);
+    return instance;
+}
+
+StaticThreadPool & getOutdatedPartsLoadingThreadPool()
+{
+    static StaticThreadPool instance("MergeTreeOutdatedPartsLoaderThreadPool", CurrentMetrics::MergeTreeOutdatedPartsLoaderThreads, CurrentMetrics::MergeTreeOutdatedPartsLoaderThreadsActive);
+    return instance;
 }
 
 }
diff --git a/src/IO/SharedThreadPools.h b/src/IO/SharedThreadPools.h
index 1b43dfe778c..188a2a4f003 100644
--- a/src/IO/SharedThreadPools.h
+++ b/src/IO/SharedThreadPools.h
@@ -1,48 +1,64 @@
 #pragma once
 
+#include <base/types.h>
 #include <Common/ThreadPool_fwd.h>
+#include <Common/CurrentMetrics.h>
+
 #include <cstdlib>
 #include <memory>
+#include <mutex>
 
 namespace DB
 {
 
-/*
- * ThreadPool used for the IO.
- */
-class IOThreadPool
+class StaticThreadPool
 {
-    static std::unique_ptr<ThreadPool> instance;
-
 public:
-    static void initialize(size_t max_threads, size_t max_free_threads, size_t queue_size);
-    static ThreadPool & get();
+    StaticThreadPool(
+        const String & name_,
+        CurrentMetrics::Metric threads_metric_,
+        CurrentMetrics::Metric threads_active_metric_);
+
+    ThreadPool & get();
+
+    void initialize(size_t max_threads, size_t max_free_threads, size_t queue_size);
+    void reloadConfiguration(size_t max_threads, size_t max_free_threads, size_t queue_size);
+
+    /// At runtime we can increase the number of threads up the specified limit
+    /// This is needed to utilize as much a possible resources to accomplish some task.
+    void setMaxTurboThreads(size_t max_threads_turbo_);
+    void enableTurboMode();
+    void disableTurboMode();
+
+private:
+    const String name;
+    const CurrentMetrics::Metric threads_metric;
+    const CurrentMetrics::Metric threads_active_metric;
+
+    std::unique_ptr<ThreadPool> instance;
+    std::mutex mutex;
+    size_t max_threads_turbo = 0;
+    size_t max_threads_normal = 0;
+    /// If this counter is > 0 - this specific mode is enabled
+    size_t turbo_mode_enabled = 0;
 };
 
+/// ThreadPool used for the IO.
+StaticThreadPool & getIOThreadPool();
 
-/*
- * ThreadPool used for the Backup IO.
- */
-class BackupsIOThreadPool
-{
-    static std::unique_ptr<ThreadPool> instance;
+/// ThreadPool used for the Backup IO.
+StaticThreadPool & getBackupsIOThreadPool();
 
-public:
-    static void initialize(size_t max_threads, size_t max_free_threads, size_t queue_size);
-    static ThreadPool & get();
-};
+/// ThreadPool used for the loading of Outdated data parts for MergeTree tables.
+StaticThreadPool & getActivePartsLoadingThreadPool();
 
+/// ThreadPool used for deleting data parts for MergeTree tables.
+StaticThreadPool & getPartsCleaningThreadPool();
 
-/*
- * ThreadPool used for the loading of Outdated data parts for MergeTree tables.
- */
-class OutdatedPartsLoadingThreadPool
-{
-    static std::unique_ptr<ThreadPool> instance;
-
-public:
-    static void initialize(size_t max_threads, size_t max_free_threads, size_t queue_size);
-    static ThreadPool & get();
-};
+/// This ThreadPool is used for the loading of Outdated data parts for MergeTree tables.
+/// Normally we will just load Outdated data parts concurrently in background, but in
+/// case when we need to synchronously wait for the loading to be finished, we can increase
+/// the number of threads by calling enableTurboMode() :-)
+StaticThreadPool & getOutdatedPartsLoadingThreadPool();
 
 }
diff --git a/src/Interpreters/threadPoolCallbackRunner.h b/src/Interpreters/threadPoolCallbackRunner.h
index f7324bfafe6..eb90b61cf31 100644
--- a/src/Interpreters/threadPoolCallbackRunner.h
+++ b/src/Interpreters/threadPoolCallbackRunner.h
@@ -44,6 +44,9 @@ ThreadPoolCallbackRunner<Result, Callback> threadPoolCallbackRunner(ThreadPool &
 
         auto future = task->get_future();
 
+        /// ThreadPool is using "bigger is higher priority" instead of "smaller is more priority".
+        /// Note: calling method scheduleOrThrowOnError in intentional, because we don't want to throw exceptions
+        /// in critical places where this callback runner is used (e.g. loading or deletion of parts)
         my_pool->scheduleOrThrowOnError([my_task = std::move(task)]{ (*my_task)(); }, priority);
 
         return future;
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 32665429051..e806e1bb93f 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -130,10 +130,6 @@ namespace ProfileEvents
 namespace CurrentMetrics
 {
     extern const Metric DelayedInserts;
-    extern const Metric MergeTreePartsLoaderThreads;
-    extern const Metric MergeTreePartsLoaderThreadsActive;
-    extern const Metric MergeTreePartsCleanerThreads;
-    extern const Metric MergeTreePartsCleanerThreadsActive;
 }
 
 
@@ -1425,71 +1421,17 @@ MergeTreeData::LoadPartResult MergeTreeData::loadDataPartWithRetries(
     UNREACHABLE();
 }
 
-std::vector<MergeTreeData::LoadPartResult> MergeTreeData::loadDataPartsFromDisk(
-    ThreadPool & pool,
-    size_t num_parts,
-    std::queue<PartLoadingTreeNodes> & parts_queue,
-    const MergeTreeSettingsPtr & settings)
+std::vector<MergeTreeData::LoadPartResult> MergeTreeData::loadDataPartsFromDisk(PartLoadingTreeNodes & parts_to_load)
 {
-    /// Parallel loading of data parts.
-    pool.setMaxThreads(std::min(static_cast<size_t>(settings->max_part_loading_threads), num_parts));
-    size_t num_threads = pool.getMaxThreads();
-    LOG_DEBUG(log, "Going to use {} threads to load parts", num_threads);
+    const size_t num_parts = parts_to_load.size();
 
-    std::vector<size_t> parts_per_thread(num_threads, num_parts / num_threads);
-    for (size_t i = 0ul; i < num_parts % num_threads; ++i)
-        ++parts_per_thread[i];
+    LOG_DEBUG(log, "Will load {} number of parts using {} threads", num_parts, getActivePartsLoadingThreadPool().get().getMaxThreads());
 
-    /// Prepare data parts for parallel loading. Threads will focus on given disk first, then steal
-    /// others' tasks when finish current disk part loading process.
-    std::vector<PartLoadingTreeNodes> threads_parts(num_threads);
-    std::set<size_t> remaining_thread_parts;
-    std::queue<size_t> threads_queue;
+    /// Shuffle all the parts randomly to possible speed up loading them from JBOD.
+    std::shuffle(parts_to_load.begin(), parts_to_load.end(), thread_local_rng);
 
-    for (size_t i = 0; i < num_threads; ++i)
-    {
-        remaining_thread_parts.insert(i);
-        threads_queue.push(i);
-    }
-
-    while (!parts_queue.empty())
-    {
-        assert(!threads_queue.empty());
-        size_t i = threads_queue.front();
-        auto & need_parts = parts_per_thread[i];
-        assert(need_parts > 0);
-
-        auto & thread_parts = threads_parts[i];
-        auto & current_parts = parts_queue.front();
-        assert(!current_parts.empty());
-
-        auto parts_to_grab = std::min(need_parts, current_parts.size());
-        thread_parts.insert(thread_parts.end(), current_parts.end() - parts_to_grab, current_parts.end());
-        current_parts.resize(current_parts.size() - parts_to_grab);
-        need_parts -= parts_to_grab;
-
-        /// Before processing next thread, change disk if possible.
-        /// Different threads will likely start loading parts from different disk,
-        /// which may improve read parallelism for JBOD.
-
-        /// If current disk still has some parts, push it to the tail.
-        if (!current_parts.empty())
-            parts_queue.push(std::move(current_parts));
-
-        parts_queue.pop();
-
-        /// If current thread still want some parts, push it to the tail.
-        if (need_parts > 0)
-            threads_queue.push(i);
-
-        threads_queue.pop();
-    }
-
-    assert(threads_queue.empty());
-    assert(std::all_of(threads_parts.begin(), threads_parts.end(), [](const auto & parts)
-    {
-        return !parts.empty();
-    }));
+    auto runner = threadPoolCallbackRunner<void>(getActivePartsLoadingThreadPool().get(), "ActiveParts");
+    std::vector<std::future<void>> parts_futures;
 
     std::mutex part_select_mutex;
     std::mutex part_loading_mutex;
@@ -1498,81 +1440,77 @@ std::vector<MergeTreeData::LoadPartResult> MergeTreeData::loadDataPartsFromDisk(
 
     try
     {
-        for (size_t thread = 0; thread < num_threads; ++thread)
+        while (true)
         {
-            pool.scheduleOrThrowOnError([&, thread, thread_group = CurrentThread::getGroup()]
+            bool are_parts_to_load_empty = false;
             {
-                SCOPE_EXIT_SAFE(
-                    if (thread_group)
-                        CurrentThread::detachFromGroupIfNotDetached();
-                );
-                if (thread_group)
-                    CurrentThread::attachToGroupIfDetached(thread_group);
+                std::lock_guard lock(part_select_mutex);
+                are_parts_to_load_empty = parts_to_load.empty();
+            }
 
-                while (true)
+            if (are_parts_to_load_empty)
+            {
+                /// Wait for all scheduled tasks.
+                /// We have to use .get() method to rethrow any exception that could occur.
+                for (auto & future: parts_futures)
+                    future.get();
+                parts_futures.clear();
+                /// At this point it is possible, that some other parts appeared in the queue for processing (parts_to_load),
+                /// because we added them from inside the pool.
+                /// So we need to recheck it.
+            }
+
+            PartLoadingTree::NodePtr current_part;
+            {
+                std::lock_guard lock(part_select_mutex);
+                if (parts_to_load.empty())
+                    break;
+
+                current_part = parts_to_load.back();
+                parts_to_load.pop_back();
+            }
+
+            parts_futures.push_back(runner(
+                [&, part = std::move(current_part)]()
                 {
-                    PartLoadingTree::NodePtr thread_part;
-                    size_t thread_idx = thread;
-
-                    {
-                        std::lock_guard lock{part_select_mutex};
-
-                        if (remaining_thread_parts.empty())
-                            return;
-
-                        /// Steal task if nothing to do
-                        if (threads_parts[thread].empty())
-                        {
-                            // Try random steal tasks from the next thread
-                            std::uniform_int_distribution<size_t> distribution(0, remaining_thread_parts.size() - 1);
-                            auto it = remaining_thread_parts.begin();
-                            std::advance(it, distribution(thread_local_rng));
-                            thread_idx = *it;
-                        }
-
-                        auto & thread_parts = threads_parts[thread_idx];
-                        thread_part = thread_parts.back();
-                        thread_parts.pop_back();
-                        if (thread_parts.empty())
-                            remaining_thread_parts.erase(thread_idx);
-                    }
-
                     /// Pass a separate mutex to guard the set of parts, because this lambda
                     /// is called concurrently but with already locked @data_parts_mutex.
                     auto res = loadDataPartWithRetries(
-                        thread_part->info, thread_part->name, thread_part->disk,
+                        part->info, part->name, part->disk,
                         DataPartState::Active, part_loading_mutex, loading_parts_initial_backoff_ms,
                         loading_parts_max_backoff_ms, loading_parts_max_tries);
 
-                    thread_part->is_loaded = true;
+                    part->is_loaded = true;
                     bool is_active_part = res.part->getState() == DataPartState::Active;
 
                     /// If part is broken or duplicate or should be removed according to transaction
                     /// and it has any covered parts then try to load them to replace this part.
-                    if (!is_active_part && !thread_part->children.empty())
+                    if (!is_active_part && !part->children.empty())
                     {
                         std::lock_guard lock{part_select_mutex};
-                        for (const auto & [_, node] : thread_part->children)
-                            threads_parts[thread].push_back(node);
-                        remaining_thread_parts.insert(thread);
+                        for (const auto & [_, node] : part->children)
+                            parts_to_load.push_back(node);
                     }
 
                     {
                         std::lock_guard lock(part_loading_mutex);
                         loaded_parts.push_back(std::move(res));
                     }
-                }
-            });
+                }, Priority{0}));
         }
     }
     catch (...)
     {
-        /// If this is not done, then in case of an exception, tasks will be destroyed before the threads are completed, and it will be bad.
-        pool.wait();
+        /// Wait for all scheduled tasks
+        /// A future becomes invalid after .get() call
+        /// + .wait() method is used not to throw any exception here.
+        for (auto & future: parts_futures)
+            if (future.valid())
+                future.wait();
+
         throw;
     }
 
-    pool.wait();
     return loaded_parts;
 }
 
@@ -1679,9 +1617,12 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
         }
     }
 
-    ThreadPool pool(CurrentMetrics::MergeTreePartsLoaderThreads, CurrentMetrics::MergeTreePartsLoaderThreadsActive, disks.size());
+    auto runner = threadPoolCallbackRunner<void>(getActivePartsLoadingThreadPool().get(), "ActiveParts");
     std::vector<PartLoadingTree::PartLoadingInfos> parts_to_load_by_disk(disks.size());
 
+    std::vector<std::future<void>> disks_futures;
+    disks_futures.reserve(disks.size());
+
     for (size_t i = 0; i < disks.size(); ++i)
     {
         const auto & disk_ptr = disks[i];
@@ -1690,7 +1631,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
 
         auto & disk_parts = parts_to_load_by_disk[i];
 
-        pool.scheduleOrThrowOnError([&, disk_ptr]()
+        disks_futures.push_back(runner([&, disk_ptr]()
         {
             for (auto it = disk_ptr->iterateDirectory(relative_data_path); it->isValid(); it->next())
             {
@@ -1703,38 +1644,31 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
                 if (auto part_info = MergeTreePartInfo::tryParsePartName(it->name(), format_version))
                     disk_parts.emplace_back(*part_info, it->name(), disk_ptr);
             }
-        });
+        }, Priority{0}));
     }
 
-    pool.wait();
+    /// For iteration to be completed
+    /// Any exception will be re-thrown.
+    for (auto & future : disks_futures)
+        future.get();
+    disks_futures.clear();
 
     PartLoadingTree::PartLoadingInfos parts_to_load;
     for (auto & disk_parts : parts_to_load_by_disk)
         std::move(disk_parts.begin(), disk_parts.end(), std::back_inserter(parts_to_load));
 
     auto loading_tree = PartLoadingTree::build(std::move(parts_to_load));
-    /// Collect parts by disks' names.
-    std::map<String, PartLoadingTreeNodes> disk_part_map;
+
+    size_t num_parts = 0;
+    PartLoadingTreeNodes active_parts;
 
     /// Collect only "the most covering" parts from the top level of the tree.
     loading_tree.traverse(/*recursive=*/ false, [&](const auto & node)
     {
-        disk_part_map[node->disk->getName()].emplace_back(node);
+        active_parts.emplace_back(node);
     });
 
-    size_t num_parts = 0;
-    std::queue<PartLoadingTreeNodes> parts_queue;
-
-    for (auto & [disk_name, disk_parts] : disk_part_map)
-    {
-        LOG_INFO(log, "Found {} parts for disk '{}' to load", disk_parts.size(), disk_name);
-
-        if (disk_parts.empty())
-            continue;
-
-        num_parts += disk_parts.size();
-        parts_queue.push(std::move(disk_parts));
-    }
+    num_parts += active_parts.size();
 
     auto part_lock = lockParts();
     LOG_TEST(log, "loadDataParts: clearing data_parts_indexes (had {} parts)", data_parts_indexes.size());
@@ -1754,7 +1688,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
 
     if (num_parts > 0)
     {
-        auto loaded_parts = loadDataPartsFromDisk(pool, num_parts, parts_queue, settings);
+        auto loaded_parts = loadDataPartsFromDisk(active_parts);
 
         for (const auto & res : loaded_parts)
         {
@@ -1783,10 +1717,12 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
 
     if (settings->in_memory_parts_enable_wal)
     {
-        pool.setMaxThreads(disks.size());
         std::vector<MutableDataPartsVector> disks_wal_parts(disks.size());
         std::mutex wal_init_lock;
 
+        std::vector<std::future<void>> wal_disks_futures;
+        wal_disks_futures.reserve(disks.size());
+
         for (size_t i = 0; i < disks.size(); ++i)
         {
             const auto & disk_ptr = disks[i];
@@ -1795,7 +1731,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
 
             auto & disk_wal_parts = disks_wal_parts[i];
 
-            pool.scheduleOrThrowOnError([&, disk_ptr]()
+            wal_disks_futures.push_back(runner([&, disk_ptr]()
             {
                 for (auto it = disk_ptr->iterateDirectory(relative_data_path); it->isValid(); it->next())
                 {
@@ -1821,10 +1757,14 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
                             disk_wal_parts.push_back(std::move(part));
                     }
                 }
-            });
+            }, Priority{0}));
         }
 
-        pool.wait();
+        /// For for iteration to be completed
+        /// Any exception will be re-thrown.
+        for (auto & future : wal_disks_futures)
+            future.get();
+        wal_disks_futures.clear();
 
         MutableDataPartsVector parts_from_wal;
         for (auto & disk_wal_parts : disks_wal_parts)
@@ -1925,7 +1865,7 @@ try
 
     std::atomic_size_t num_loaded_parts = 0;
 
-    auto runner = threadPoolCallbackRunner<void>(OutdatedPartsLoadingThreadPool::get(), "OutdatedParts");
+    auto runner = threadPoolCallbackRunner<void>(getOutdatedPartsLoadingThreadPool().get(), "OutdatedParts");
     std::vector<std::future<void>> parts_futures;
 
     while (true)
@@ -1938,8 +1878,10 @@ try
             if (is_async && outdated_data_parts_loading_canceled)
             {
                 /// Wait for every scheduled task
+                /// In case of any exception it will be re-thrown and server will be terminated.
                 for (auto & future : parts_futures)
-                    future.wait();
+                    future.get();
+                parts_futures.clear();
 
                 LOG_DEBUG(log,
                     "Stopped loading outdated data parts because task was canceled. "
@@ -1973,7 +1915,7 @@ try
 
     /// Wait for every scheduled task
     for (auto & future : parts_futures)
-        future.wait();
+        future.get();
 
     LOG_DEBUG(log, "Loaded {} outdated data parts {}",
         num_loaded_parts, is_async ? "asynchronously" : "synchronously");
@@ -1999,6 +1941,13 @@ void MergeTreeData::waitForOutdatedPartsToBeLoaded() const TSA_NO_THREAD_SAFETY_
     if (isStaticStorage())
         return;
 
+    /// We need to load parts as fast as possible
+    getOutdatedPartsLoadingThreadPool().enableTurboMode();
+    SCOPE_EXIT({
+        /// Let's lower the number of threads e.g. for later ATTACH queries to behave as usual
+        getOutdatedPartsLoadingThreadPool().disableTurboMode();
+    });
+
     LOG_TRACE(log, "Will wait for outdated data parts to be loaded");
 
     std::unique_lock lock(outdated_data_parts_mutex);
@@ -2420,20 +2369,15 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t
         }
     };
 
-    if (settings->max_part_removal_threads <= 1 || parts_to_remove.size() <= settings->concurrent_part_removal_threshold)
+    if (parts_to_remove.size() <= settings->concurrent_part_removal_threshold)
     {
         remove_single_thread();
         return;
     }
 
     /// Parallel parts removal.
-    size_t num_threads = settings->max_part_removal_threads;
-    if (!num_threads)
-        num_threads = getNumberOfPhysicalCPUCores() * 2;
-    num_threads = std::min<size_t>(num_threads, parts_to_remove.size());
     std::mutex part_names_mutex;
-    ThreadPool pool(CurrentMetrics::MergeTreePartsCleanerThreads, CurrentMetrics::MergeTreePartsCleanerThreadsActive,
-                    num_threads, num_threads, /* unlimited queue size */ 0);
+    auto runner = threadPoolCallbackRunner<void>(getPartsCleaningThreadPool().get(), "PartsCleaning");
 
     /// This flag disallow straightforward concurrent parts removal. It's required only in case
     /// when we have parts on zero-copy disk + at least some of them were mutated.
@@ -2453,27 +2397,27 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t
         LOG_DEBUG(
             log, "Removing {} parts from filesystem (concurrently): Parts: [{}]", parts_to_remove.size(), fmt::join(parts_to_remove, ", "));
 
+        std::vector<std::future<void>> parts_to_remove_futures;
+        parts_to_remove_futures.reserve(parts_to_remove.size());
+
         for (const DataPartPtr & part : parts_to_remove)
         {
-            pool.scheduleOrThrowOnError([&part, &part_names_mutex, part_names_succeed, thread_group = CurrentThread::getGroup()]
+            parts_to_remove_futures.push_back(runner([&part, &part_names_mutex, part_names_succeed, thread_group = CurrentThread::getGroup()]
             {
-                SCOPE_EXIT_SAFE(
-                    if (thread_group)
-                        CurrentThread::detachFromGroupIfNotDetached();
-                );
-                if (thread_group)
-                    CurrentThread::attachToGroupIfDetached(thread_group);
-
                 asMutableDeletingPart(part)->remove();
                 if (part_names_succeed)
                 {
                     std::lock_guard lock(part_names_mutex);
                     part_names_succeed->insert(part->name);
                 }
-            });
+            }, Priority{0}));
         }
 
-        pool.wait();
+        /// Any exception will be re-thrown.
+        for (auto & future : parts_to_remove_futures)
+            future.get();
+        parts_to_remove_futures.clear();
+
         return;
     }
 
@@ -2544,20 +2488,15 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t
         return independent_ranges;
     };
 
-    auto schedule_parts_removal = [this, &pool, &part_names_mutex, part_names_succeed](
+    std::vector<std::future<void>> part_removal_futures;
+
+    auto schedule_parts_removal = [this, &runner, &part_names_mutex, part_names_succeed, &part_removal_futures](
         const MergeTreePartInfo & range, DataPartsVector && parts_in_range)
     {
         /// Below, range should be captured by copy to avoid use-after-scope on exception from pool
-        pool.scheduleOrThrowOnError(
-            [this, range, &part_names_mutex, part_names_succeed, thread_group = CurrentThread::getGroup(), batch = std::move(parts_in_range)]
+        part_removal_futures.push_back(runner(
+            [this, range, &part_names_mutex, part_names_succeed, batch = std::move(parts_in_range)]
         {
-            SCOPE_EXIT_SAFE(
-                if (thread_group)
-                    CurrentThread::detachFromGroupIfNotDetached();
-            );
-            if (thread_group)
-                CurrentThread::attachToGroupIfDetached(thread_group);
-
             LOG_TRACE(log, "Removing {} parts in blocks range {}", batch.size(), range.getPartNameForLogs());
 
             for (const auto & part : batch)
@@ -2569,7 +2508,7 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t
                     part_names_succeed->insert(part->name);
                 }
             }
-        });
+        }, Priority{0}));
     };
 
     RemovalRanges independent_ranges = split_into_independent_ranges(parts_to_remove, /* split_times */ 0);
@@ -2632,7 +2571,11 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t
     LOG_TRACE(log, "Will remove {} big parts separately: {}", excluded_parts.size(), fmt::join(excluded_parts, ", "));
 
     independent_ranges = split_into_independent_ranges(excluded_parts, /* split_times */ 0);
-    pool.wait();
+
+    /// Any exception will be re-thrown.
+    for (auto & future : part_removal_futures)
+        future.get();
+    part_removal_futures.clear();
 
     for (size_t i = 0; i < independent_ranges.infos.size(); ++i)
     {
@@ -2641,7 +2584,10 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t
         schedule_parts_removal(range, std::move(parts_in_range));
     }
 
-    pool.wait();
+    /// Any exception will be re-thrown.
+    for (auto & future : part_removal_futures)
+        future.get();
+    part_removal_futures.clear();
 
     if (parts_to_remove.size() != sum_of_ranges + excluded_parts.size())
         throw Exception(ErrorCodes::LOGICAL_ERROR,
diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index 1c41de6fa19..2f254f9a787 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -1519,11 +1519,7 @@ private:
         size_t max_backoff_ms,
         size_t max_tries);
 
-    std::vector<LoadPartResult> loadDataPartsFromDisk(
-        ThreadPool & pool,
-        size_t num_parts,
-        std::queue<PartLoadingTreeNodes> & parts_queue,
-        const MergeTreeSettingsPtr & settings);
+    std::vector<LoadPartResult> loadDataPartsFromDisk(PartLoadingTreeNodes & parts_to_load);
 
     void loadDataPartsFromWAL(MutableDataPartsVector & parts_from_wal);
 
diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index 5ea99009756..33aea358078 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -143,8 +143,6 @@ struct Settings;
     M(Bool, ttl_only_drop_parts, false, "Only drop altogether the expired parts and not partially prune them.", 0) \
     M(Bool, materialize_ttl_recalculate_only, false, "Only recalculate ttl info when MATERIALIZE TTL", 0) \
     M(Bool, enable_mixed_granularity_parts, true, "Enable parts with adaptive and non adaptive granularity", 0) \
-    M(MaxThreads, max_part_loading_threads, 0, "The number of threads to load data parts at startup.", 0) \
-    M(MaxThreads, max_part_removal_threads, 0, "The number of threads for concurrent removal of inactive data parts. One is usually enough, but in 'Google Compute Environment SSD Persistent Disks' file removal (unlink) operation is extraordinarily slow and you probably have to increase this number (recommended is up to 16).", 0) \
     M(UInt64, concurrent_part_removal_threshold, 100, "Activate concurrent part removal (see 'max_part_removal_threads') only if the number of inactive data parts is at least this.", 0) \
     M(UInt64, zero_copy_concurrent_part_removal_max_split_times, 5, "Max recursion depth for splitting independent Outdated parts ranges into smaller subranges (highly not recommended to change)", 0) \
     M(Float, zero_copy_concurrent_part_removal_max_postpone_ratio, static_cast<Float32>(0.05), "Max percentage of top level parts to postpone removal in order to get smaller independent ranges (highly not recommended to change)", 0) \
@@ -192,6 +190,9 @@ struct Settings;
     M(UInt64, write_ahead_log_bytes_to_fsync, 100ULL * 1024 * 1024, "Obsolete setting, does nothing.", 0) \
     M(UInt64, write_ahead_log_interval_ms_to_fsync, 100, "Obsolete setting, does nothing.", 0) \
     M(Bool, in_memory_parts_insert_sync, false, "Obsolete setting, does nothing.", 0) \
+    M(MaxThreads, max_part_loading_threads, 0, "Obsolete setting, does nothing.", 0) \
+    M(MaxThreads, max_part_removal_threads, 0, "Obsolete setting, does nothing.", 0) \
+
     /// Settings that should not change after the creation of a table.
     /// NOLINTNEXTLINE
 #define APPLY_FOR_IMMUTABLE_MERGE_TREE_SETTINGS(M) \
diff --git a/src/Storages/MergeTree/MergeTreeSource.cpp b/src/Storages/MergeTree/MergeTreeSource.cpp
index b65f044a13b..69fbdd5a64d 100644
--- a/src/Storages/MergeTree/MergeTreeSource.cpp
+++ b/src/Storages/MergeTree/MergeTreeSource.cpp
@@ -105,7 +105,7 @@ struct MergeTreeSource::AsyncReadingState
     AsyncReadingState()
     {
         control = std::make_shared<Control>();
-        callback_runner = threadPoolCallbackRunner<void>(IOThreadPool::get(), "MergeTreeRead");
+        callback_runner = threadPoolCallbackRunner<void>(getIOThreadPool().get(), "MergeTreeRead");
     }
 
     ~AsyncReadingState()
diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index 2d8aaec0f07..f1a7bcb71a2 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -766,7 +766,7 @@ public:
                 DBMS_DEFAULT_BUFFER_SIZE,
                 configuration_.request_settings,
                 std::nullopt,
-                threadPoolCallbackRunner<void>(IOThreadPool::get(), "S3ParallelWrite"),
+                threadPoolCallbackRunner<void>(getIOThreadPool().get(), "S3ParallelWrite"),
                 context->getWriteSettings()),
             compression_method,
             3);
diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp
index 9f80b994051..97af4094e42 100644
--- a/src/Storages/System/StorageSystemDetachedParts.cpp
+++ b/src/Storages/System/StorageSystemDetachedParts.cpp
@@ -194,7 +194,7 @@ private:
             futures.push_back(
                         scheduleFromThreadPool<void>(
                             std::move(worker),
-                            IOThreadPool::get(),
+                            getIOThreadPool().get(),
                             "DP_BytesOnDisk"));
         }
 
diff --git a/tests/queries/0_stateless/00988_parallel_parts_removal.sql b/tests/queries/0_stateless/00988_parallel_parts_removal.sql
index bff9bbe6d8d..8f79276782b 100644
--- a/tests/queries/0_stateless/00988_parallel_parts_removal.sql
+++ b/tests/queries/0_stateless/00988_parallel_parts_removal.sql
@@ -1,6 +1,6 @@
 DROP TABLE IF EXISTS mt;
 
-CREATE TABLE mt (x UInt64) ENGINE = MergeTree ORDER BY x SETTINGS max_part_removal_threads = 16, cleanup_delay_period = 1, cleanup_delay_period_random_add = 0, old_parts_lifetime = 1, parts_to_delay_insert = 100000, parts_to_throw_insert = 100000;
+CREATE TABLE mt (x UInt64) ENGINE = MergeTree ORDER BY x SETTINGS cleanup_delay_period = 1, cleanup_delay_period_random_add = 0, old_parts_lifetime = 1, parts_to_delay_insert = 100000, parts_to_throw_insert = 100000;
 
 SYSTEM STOP MERGES mt;
 
diff --git a/tests/queries/0_stateless/00989_parallel_parts_loading.sql b/tests/queries/0_stateless/00989_parallel_parts_loading.sql
index 13cd56e1924..a05515cf756 100644
--- a/tests/queries/0_stateless/00989_parallel_parts_loading.sql
+++ b/tests/queries/0_stateless/00989_parallel_parts_loading.sql
@@ -2,7 +2,7 @@
 
 DROP TABLE IF EXISTS mt;
 
-CREATE TABLE mt (x UInt64) ENGINE = MergeTree ORDER BY x SETTINGS max_part_loading_threads = 16, parts_to_delay_insert = 100000, parts_to_throw_insert = 100000;
+CREATE TABLE mt (x UInt64) ENGINE = MergeTree ORDER BY x SETTINGS parts_to_delay_insert = 100000, parts_to_throw_insert = 100000;
 
 SYSTEM STOP MERGES mt;
 
diff --git a/tests/queries/0_stateless/01810_max_part_removal_threads_long.sh b/tests/queries/0_stateless/01810_max_part_removal_threads_long.sh
index f8f49816479..87153a4bd58 100755
--- a/tests/queries/0_stateless/01810_max_part_removal_threads_long.sh
+++ b/tests/queries/0_stateless/01810_max_part_removal_threads_long.sh
@@ -11,6 +11,9 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CUR_DIR"/../shell_config.sh
 
+# The number of threads removing data parts should be between 1 and 129.
+# Because max_parts_cleaning_thread_pool_size is 128 by default
+
 $CLICKHOUSE_CLIENT --allow_deprecated_database_ordinary=1 -nm -q "create database ordinary_$CLICKHOUSE_DATABASE engine=Ordinary"
 
 # MergeTree
@@ -22,7 +25,7 @@ $CLICKHOUSE_CLIENT -nm -q """
     Engine=MergeTree()
     order by key
     partition by key%100
-    settings max_part_removal_threads=10, concurrent_part_removal_threshold=99, min_bytes_for_wide_part=0;
+    settings concurrent_part_removal_threshold=99, min_bytes_for_wide_part=0;
 
     insert into data_01810 select * from numbers(100);
     drop table data_01810 settings log_queries=1;
@@ -30,7 +33,7 @@ $CLICKHOUSE_CLIENT -nm -q """
 
     -- sometimes the same thread can be used to remove part, due to ThreadPool,
     -- hence we cannot compare strictly.
-    select throwIf(not(length(thread_ids) between 1 and 11))
+    select throwIf(not(length(thread_ids) between 1 and 129))
     from system.query_log
     where
         event_date >= yesterday() and
@@ -49,7 +52,7 @@ $CLICKHOUSE_CLIENT -nm -q """
     Engine=ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/rep_data_01810', '1')
     order by key
     partition by key%100
-    settings max_part_removal_threads=10, concurrent_part_removal_threshold=99, min_bytes_for_wide_part=0;
+    settings concurrent_part_removal_threshold=99, min_bytes_for_wide_part=0;
 
     SET insert_keeper_max_retries=1000;
     SET insert_keeper_retry_max_backoff_ms=10;
@@ -60,7 +63,7 @@ $CLICKHOUSE_CLIENT -nm -q """
 
     -- sometimes the same thread can be used to remove part, due to ThreadPool,
     -- hence we cannot compare strictly.
-    select throwIf(not(length(thread_ids) between 1 and 11))
+    select throwIf(not(length(thread_ids) between 1 and 129))
     from system.query_log
     where
         event_date >= yesterday() and
diff --git a/tests/queries/0_stateless/02432_s3_parallel_parts_cleanup.sql b/tests/queries/0_stateless/02432_s3_parallel_parts_cleanup.sql
index 88fb2cdf9b1..5b9342972f4 100644
--- a/tests/queries/0_stateless/02432_s3_parallel_parts_cleanup.sql
+++ b/tests/queries/0_stateless/02432_s3_parallel_parts_cleanup.sql
@@ -8,7 +8,7 @@ drop table if exists rmt2;
 -- Disable compact parts, because we need hardlinks in mutations.
 create table rmt (n int, m int, k int) engine=ReplicatedMergeTree('/test/02432/{database}', '1') order by tuple()
     settings storage_policy = 's3_cache', allow_remote_fs_zero_copy_replication=1,
-        max_part_removal_threads=10, concurrent_part_removal_threshold=1, cleanup_delay_period=1, cleanup_delay_period_random_add=1,
+        concurrent_part_removal_threshold=1, cleanup_delay_period=1, cleanup_delay_period_random_add=1,
         max_replicated_merges_in_queue=0, max_replicated_mutations_in_queue=0, min_bytes_for_wide_part=0, min_rows_for_wide_part=0;
 
 insert into rmt(n, m) values (1, 42);
@@ -38,7 +38,7 @@ select count(), sum(n), sum(m) from rmt;
 -- New table can assign merges/mutations and can remove old parts
 create table rmt2 (n int, m int, k String) engine=ReplicatedMergeTree('/test/02432/{database}', '2') order by tuple()
     settings storage_policy = 's3_cache', allow_remote_fs_zero_copy_replication=1,
-        max_part_removal_threads=10, concurrent_part_removal_threshold=1, cleanup_delay_period=1, cleanup_delay_period_random_add=1,
+        concurrent_part_removal_threshold=1, cleanup_delay_period=1, cleanup_delay_period_random_add=1,
         min_bytes_for_wide_part=0, min_rows_for_wide_part=0, max_replicated_merges_in_queue=1,
         old_parts_lifetime=0;
 
@@ -66,4 +66,3 @@ drop table rmt2;
 system flush logs;
 select count() > 0 from system.text_log where yesterday() <= event_date and logger_name like '%' || currentDatabase() || '%' and message like '%Removing % parts from filesystem (concurrently): Parts:%';
 select count() > 1, countDistinct(thread_id) > 1 from system.text_log where yesterday() <= event_date and logger_name like '%' || currentDatabase() || '%' and message like '%Removing % parts in blocks range%';
-

From 36d298ceef12daf6689ae648b7efdedf2ee83d79 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@clickhouse.com>
Date: Tue, 6 Jun 2023 14:45:58 +0200
Subject: [PATCH 64/64] Fix commit for DiskObjectStorage (#50599)

---
 src/Disks/ObjectStorages/DiskObjectStorage.cpp            | 3 ++-
 src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp | 2 --
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp
index 129f1ab1ef7..005d115a277 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp
@@ -596,7 +596,8 @@ void DiskObjectStorage::writeFileUsingBlobWritingFunction(const String & path, W
 {
     LOG_TEST(log, "Write file: {}", path);
     auto transaction = createObjectStorageTransaction();
-    return transaction->writeFileUsingBlobWritingFunction(path, mode, std::move(write_blob_function));
+    transaction->writeFileUsingBlobWritingFunction(path, mode, std::move(write_blob_function));
+    transaction->commit();
 }
 
 void DiskObjectStorage::applyNewSettings(
diff --git a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp
index 257a6fdf2ea..bd66ada492f 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp
+++ b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp
@@ -710,8 +710,6 @@ void DiskObjectStorageTransaction::writeFileUsingBlobWritingFunction(
         metadata_transaction->createMetadataFile(path, blob_name, object_size);
     else
         metadata_transaction->addBlobToMetadata(path, blob_name, object_size);
-
-    metadata_transaction->commit();
 }