Merge branch 'master' into part-thresholds-override-more-sane

2024-11-27 01:51:59 +00:00 · 2022-10-02 02:26:21 +03:00 · 2022-10-02 02:26:21 +03:00 · 0da9ed8a18
commit 0da9ed8a18
parent 7b71524109 5fc5c8f40f
100 changed files with 1455 additions and 514 deletions
--- a/contrib/base64-cmake/CMakeLists.txt
+++ b/contrib/base64-cmake/CMakeLists.txt
@ -1,4 +1,4 @@
-if(ARCH_AMD64 OR ARCH_AARCH64)
+if(ARCH_AMD64 OR ARCH_AARCH64 OR ARCH_PPC64LE)
    option (ENABLE_BASE64 "Enable base64" ${ENABLE_LIBRARIES})
 elseif(ENABLE_BASE64)
    message (${RECONFIGURE_MESSAGE_LEVEL} "base64 library is only supported on x86_64 and aarch64")
@ -26,7 +26,11 @@ if (ARCH_AMD64)
    target_compile_options(_base64_avx PRIVATE -falign-loops -mavx)
    target_compile_options(_base64_avx2 PRIVATE -falign-loops -mavx2)
 else ()
-    target_compile_options(_base64_ssse3 PRIVATE -falign-loops)
+    if (ARCH_PPC64LE)
+        target_compile_options(_base64_ssse3 PRIVATE -D__SSSE3__ -falign-loops)
+    else()
+        target_compile_options(_base64_ssse3 PRIVATE -falign-loops)
+    endif()
 endif ()

 if (ARCH_AMD64)
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="22.9.2.7"
+ARG VERSION="22.9.3.18"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"

 # user/group precreated explicitly with fixed uid/gid on purpose.
--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@ -21,7 +21,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list

 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="deb https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
-ARG VERSION="22.9.2.7"
+ARG VERSION="22.9.3.18"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"

 # set non-empty deb_location_url url to create a docker image
--- a/docker/test/stress/run.sh
+++ b/docker/test/stress/run.sh
@ -13,25 +13,28 @@ sysctl kernel.core_pattern='core.%e.%p-%P'

 # Thread Fuzzer allows to check more permutations of possible thread scheduling
 # and find more potential issues.
+# Temporarily disable ThreadFuzzer with tsan because of https://github.com/google/sanitizers/issues/1540
+is_tsan_build=$(clickhouse local -q "select value like '% -fsanitize=thread %' from system.build_options where name='CXX_FLAGS'")
+if [ "$is_tsan_build" -eq "0" ]; then
+    export THREAD_FUZZER_CPU_TIME_PERIOD_US=1000
+    export THREAD_FUZZER_SLEEP_PROBABILITY=0.1
+    export THREAD_FUZZER_SLEEP_TIME_US=100000

-export THREAD_FUZZER_CPU_TIME_PERIOD_US=1000
-export THREAD_FUZZER_SLEEP_PROBABILITY=0.1
-export THREAD_FUZZER_SLEEP_TIME_US=100000
+    export THREAD_FUZZER_pthread_mutex_lock_BEFORE_MIGRATE_PROBABILITY=1
+    export THREAD_FUZZER_pthread_mutex_lock_AFTER_MIGRATE_PROBABILITY=1
+    export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_MIGRATE_PROBABILITY=1
+    export THREAD_FUZZER_pthread_mutex_unlock_AFTER_MIGRATE_PROBABILITY=1

-export THREAD_FUZZER_pthread_mutex_lock_BEFORE_MIGRATE_PROBABILITY=1
-export THREAD_FUZZER_pthread_mutex_lock_AFTER_MIGRATE_PROBABILITY=1
-export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_MIGRATE_PROBABILITY=1
-export THREAD_FUZZER_pthread_mutex_unlock_AFTER_MIGRATE_PROBABILITY=1
+    export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_PROBABILITY=0.001
+    export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_PROBABILITY=0.001
+    export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_PROBABILITY=0.001
+    export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_PROBABILITY=0.001
+    export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US=10000

-export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_PROBABILITY=0.001
-export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_PROBABILITY=0.001
-export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_PROBABILITY=0.001
-export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_PROBABILITY=0.001
-export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US=10000
-
-export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US=10000
-export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US=10000
-export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US=10000
+    export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US=10000
+    export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US=10000
+    export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US=10000
+fi


 function install_packages()
--- a/docs/changelogs/v22.3.13.80-lts.md
+++ b/docs/changelogs/v22.3.13.80-lts.md
@ -0,0 +1,56 @@
+---
+sidebar_position: 1
+sidebar_label: 2022
+---
+
+# 2022 Changelog
+
+### ClickHouse release v22.3.13.80-lts (e2708b01fba) FIXME as compared to v22.3.12.19-lts (4a08f8a073b)
+
+#### New Feature
+* Backported in [#41264](https://github.com/ClickHouse/ClickHouse/issues/41264): Implemented automatic conversion of database engine from `Ordinary` to `Atomic`. Create empty `convert_ordinary_to_atomic` file in `flags` directory and all `Ordinary` databases will be converted automatically on next server start. Resolves [#39546](https://github.com/ClickHouse/ClickHouse/issues/39546). [#39933](https://github.com/ClickHouse/ClickHouse/pull/39933) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Backported in [#40886](https://github.com/ClickHouse/ClickHouse/issues/40886): Add setting to disable limit on kafka_num_consumers. Closes [#40331](https://github.com/ClickHouse/ClickHouse/issues/40331). [#40670](https://github.com/ClickHouse/ClickHouse/pull/40670) ([Kruglov Pavel](https://github.com/Avogar)).
+
+#### Bug Fix
+* Backported in [#41273](https://github.com/ClickHouse/ClickHouse/issues/41273): Fix memory safety issues with functions `encrypt` and `contingency` if Array of Nullable is used as an argument. This fixes [#41004](https://github.com/ClickHouse/ClickHouse/issues/41004). [#40195](https://github.com/ClickHouse/ClickHouse/pull/40195) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### Build/Testing/Packaging Improvement
+* Backported in [#41557](https://github.com/ClickHouse/ClickHouse/issues/41557): Add `source` field to deb packages, update `nfpm`. [#41531](https://github.com/ClickHouse/ClickHouse/pull/41531) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
+#### Bug Fix (user-visible misbehavior in official stable or prestable release)
+
+* Backported in [#40745](https://github.com/ClickHouse/ClickHouse/issues/40745): * Fix cast lowcard of nullable in JoinSwitcher, close [#37385](https://github.com/ClickHouse/ClickHouse/issues/37385). [#37453](https://github.com/ClickHouse/ClickHouse/pull/37453) ([Vladimir C](https://github.com/vdimir)).
+* Backported in [#41812](https://github.com/ClickHouse/ClickHouse/issues/41812): Update `simdjson`. This fixes [#38621](https://github.com/ClickHouse/ClickHouse/issues/38621). [#38838](https://github.com/ClickHouse/ClickHouse/pull/38838) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#41320](https://github.com/ClickHouse/ClickHouse/issues/41320): Fix unused unknown columns introduced by WITH statement. This fixes [#37812](https://github.com/ClickHouse/ClickHouse/issues/37812) . [#39131](https://github.com/ClickHouse/ClickHouse/pull/39131) ([Amos Bird](https://github.com/amosbird)).
+* Backported in [#40904](https://github.com/ClickHouse/ClickHouse/issues/40904): Fix potential deadlock in WriteBufferFromS3 during task scheduling failure. [#40070](https://github.com/ClickHouse/ClickHouse/pull/40070) ([Maksim Kita](https://github.com/kitaisreal)).
+* Backported in [#41916](https://github.com/ClickHouse/ClickHouse/issues/41916): Fix rare bug with column TTL for MergeTree engines family: In case of repeated vertical merge the error `Cannot unlink file ColumnName.bin ... No such file or directory.` could happen. [#40346](https://github.com/ClickHouse/ClickHouse/pull/40346) ([alesapin](https://github.com/alesapin)).
+* Backported in [#40903](https://github.com/ClickHouse/ClickHouse/issues/40903): Proxy resolver stop on first successful request to endpoint. [#40353](https://github.com/ClickHouse/ClickHouse/pull/40353) ([Maksim Kita](https://github.com/kitaisreal)).
+* Backported in [#40663](https://github.com/ClickHouse/ClickHouse/issues/40663): Fix potential dataloss due to a bug in AWS SDK (https://github.com/aws/aws-sdk-cpp/issues/658). Bug can be triggered only when clickhouse is used over S3. [#40506](https://github.com/ClickHouse/ClickHouse/pull/40506) ([alesapin](https://github.com/alesapin)).
+* Backported in [#40901](https://github.com/ClickHouse/ClickHouse/issues/40901): Fix memory leak while pushing to MVs w/o query context (from Kafka/...). [#40732](https://github.com/ClickHouse/ClickHouse/pull/40732) ([Azat Khuzhin](https://github.com/azat)).
+* Backported in [#41637](https://github.com/ClickHouse/ClickHouse/issues/41637): Fix possible segfaults, use-heap-after-free and memory leak in aggregate function combinators. Closes [#40848](https://github.com/ClickHouse/ClickHouse/issues/40848). [#41083](https://github.com/ClickHouse/ClickHouse/pull/41083) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#41664](https://github.com/ClickHouse/ClickHouse/issues/41664): Queries with `OFFSET` clause in subquery and `WHERE` clause in outer query might return incorrect result, it's fixed. Fixes [#40416](https://github.com/ClickHouse/ClickHouse/issues/40416). [#41280](https://github.com/ClickHouse/ClickHouse/pull/41280) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Backported in [#41356](https://github.com/ClickHouse/ClickHouse/issues/41356): Add column type check before UUID insertion in MsgPack format. [#41309](https://github.com/ClickHouse/ClickHouse/pull/41309) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#41807](https://github.com/ClickHouse/ClickHouse/issues/41807): Malicious data in Native format might cause a crash. [#41441](https://github.com/ClickHouse/ClickHouse/pull/41441) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#41804](https://github.com/ClickHouse/ClickHouse/issues/41804): The aggregate function `categorialInformationValue` was having incorrectly defined properties, which might cause a null pointer dereferencing at runtime. This closes [#41443](https://github.com/ClickHouse/ClickHouse/issues/41443). [#41449](https://github.com/ClickHouse/ClickHouse/pull/41449) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#41503](https://github.com/ClickHouse/ClickHouse/issues/41503): Writing data in Apache `ORC` format might lead to a buffer overrun. [#41458](https://github.com/ClickHouse/ClickHouse/pull/41458) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#41639](https://github.com/ClickHouse/ClickHouse/issues/41639): Fix possible `pipeline stuck` exception for queries with `OFFSET`. The error was found with `enable_optimize_predicate_expression = 0` and always false condition in `WHERE`. Fixes [#41383](https://github.com/ClickHouse/ClickHouse/issues/41383). [#41588](https://github.com/ClickHouse/ClickHouse/pull/41588) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#41899](https://github.com/ClickHouse/ClickHouse/issues/41899): Fix possible crash in `SELECT` from `Merge` table with enabled `optimize_monotonous_functions_in_order_by` setting. Fixes [#41269](https://github.com/ClickHouse/ClickHouse/issues/41269). [#41740](https://github.com/ClickHouse/ClickHouse/pull/41740) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+
+#### Bug Fix (user-visible misbehaviour in official stable or prestable release)
+
+* Backported in [#41321](https://github.com/ClickHouse/ClickHouse/issues/41321): Fix bug in function `if` when resulting column type differs with resulting data type that led to logical errors like `Logical error: 'Bad cast from type DB::ColumnVector<int> to DB::ColumnVector<long>'.`. Closes [#35367](https://github.com/ClickHouse/ClickHouse/issues/35367). [#35476](https://github.com/ClickHouse/ClickHouse/pull/35476) ([Kruglov Pavel](https://github.com/Avogar)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* use ROBOT_CLICKHOUSE_COMMIT_TOKEN for create-pull-request [#40067](https://github.com/ClickHouse/ClickHouse/pull/40067) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
+* use input token instead of env var [#40421](https://github.com/ClickHouse/ClickHouse/pull/40421) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
+* DNSResolver remove AI_V4MAPPED, AI_ALL hints [#40502](https://github.com/ClickHouse/ClickHouse/pull/40502) ([Maksim Kita](https://github.com/kitaisreal)).
+* Migrate artifactory [#40831](https://github.com/ClickHouse/ClickHouse/pull/40831) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Docker server version [#41256](https://github.com/ClickHouse/ClickHouse/pull/41256) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Download ccache from release PRs for backports [#41328](https://github.com/ClickHouse/ClickHouse/pull/41328) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Remove `-WithTerminatingZero` methods [#41431](https://github.com/ClickHouse/ClickHouse/pull/41431) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Remove trash from Field [#41457](https://github.com/ClickHouse/ClickHouse/pull/41457) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Mask some information in logs. [#41474](https://github.com/ClickHouse/ClickHouse/pull/41474) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Build latest tags ONLY from master branch [#41567](https://github.com/ClickHouse/ClickHouse/pull/41567) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Fix the typo preventing building latest images [#41769](https://github.com/ClickHouse/ClickHouse/pull/41769) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
--- a/docs/changelogs/v22.7.6.74-stable.md
+++ b/docs/changelogs/v22.7.6.74-stable.md
@ -0,0 +1,52 @@
+---
+sidebar_position: 1
+sidebar_label: 2022
+---
+
+# 2022 Changelog
+
+### ClickHouse release v22.7.6.74-stable (c00ffb3c11a) FIXME as compared to v22.7.5.13-stable (6f48d2d1f59)
+
+#### New Feature
+* Backported in [#40869](https://github.com/ClickHouse/ClickHouse/issues/40869): Add setting to disable limit on kafka_num_consumers. Closes [#40331](https://github.com/ClickHouse/ClickHouse/issues/40331). [#40670](https://github.com/ClickHouse/ClickHouse/pull/40670) ([Kruglov Pavel](https://github.com/Avogar)).
+
+#### Bug Fix
+* Backported in [#41228](https://github.com/ClickHouse/ClickHouse/issues/41228): Fix memory safety issues with functions `encrypt` and `contingency` if Array of Nullable is used as an argument. This fixes [#41004](https://github.com/ClickHouse/ClickHouse/issues/41004). [#40195](https://github.com/ClickHouse/ClickHouse/pull/40195) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### Build/Testing/Packaging Improvement
+* Backported in [#41559](https://github.com/ClickHouse/ClickHouse/issues/41559): Add `source` field to deb packages, update `nfpm`. [#41531](https://github.com/ClickHouse/ClickHouse/pull/41531) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
+#### Bug Fix (user-visible misbehavior in official stable or prestable release)
+
+* Backported in [#41283](https://github.com/ClickHouse/ClickHouse/issues/41283): Fix unused unknown columns introduced by WITH statement. This fixes [#37812](https://github.com/ClickHouse/ClickHouse/issues/37812) . [#39131](https://github.com/ClickHouse/ClickHouse/pull/39131) ([Amos Bird](https://github.com/amosbird)).
+* Backported in [#40865](https://github.com/ClickHouse/ClickHouse/issues/40865): - Fix crash while parsing values of type `Object` that contains arrays of variadic dimension. [#40483](https://github.com/ClickHouse/ClickHouse/pull/40483) ([Duc Canh Le](https://github.com/canhld94)).
+* Backported in [#40804](https://github.com/ClickHouse/ClickHouse/issues/40804): During insertion of a new query to the `ProcessList` allocations happen. If we reach the memory limit during these allocations we can not use `OvercommitTracker`, because `ProcessList::mutex` is already acquired. Fixes [#40611](https://github.com/ClickHouse/ClickHouse/issues/40611). [#40677](https://github.com/ClickHouse/ClickHouse/pull/40677) ([Dmitry Novik](https://github.com/novikd)).
+* Backported in [#40810](https://github.com/ClickHouse/ClickHouse/issues/40810): In [#40595](https://github.com/ClickHouse/ClickHouse/issues/40595) it was reported that the `host_regexp` functionality was not working properly with a name to address resolution in `/etc/hosts`. It's fixed. [#40769](https://github.com/ClickHouse/ClickHouse/pull/40769) ([Arthur Passos](https://github.com/arthurpassos)).
+* Backported in [#41134](https://github.com/ClickHouse/ClickHouse/issues/41134): Fix access rights for `DESCRIBE TABLE url()` and some other `DESCRIBE TABLE <table_function>()`. [#40975](https://github.com/ClickHouse/ClickHouse/pull/40975) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Backported in [#41617](https://github.com/ClickHouse/ClickHouse/issues/41617): Fix possible segfaults, use-heap-after-free and memory leak in aggregate function combinators. Closes [#40848](https://github.com/ClickHouse/ClickHouse/issues/40848). [#41083](https://github.com/ClickHouse/ClickHouse/pull/41083) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#41666](https://github.com/ClickHouse/ClickHouse/issues/41666): Queries with `OFFSET` clause in subquery and `WHERE` clause in outer query might return incorrect result, it's fixed. Fixes [#40416](https://github.com/ClickHouse/ClickHouse/issues/40416). [#41280](https://github.com/ClickHouse/ClickHouse/pull/41280) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Backported in [#41361](https://github.com/ClickHouse/ClickHouse/issues/41361): Fix incorrect logical error `Expected relative path` in disk object storage. Related to [#41246](https://github.com/ClickHouse/ClickHouse/issues/41246). [#41297](https://github.com/ClickHouse/ClickHouse/pull/41297) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Backported in [#41358](https://github.com/ClickHouse/ClickHouse/issues/41358): Add column type check before UUID insertion in MsgPack format. [#41309](https://github.com/ClickHouse/ClickHouse/pull/41309) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#41808](https://github.com/ClickHouse/ClickHouse/issues/41808): Malicious data in Native format might cause a crash. [#41441](https://github.com/ClickHouse/ClickHouse/pull/41441) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#41805](https://github.com/ClickHouse/ClickHouse/issues/41805): The aggregate function `categorialInformationValue` was having incorrectly defined properties, which might cause a null pointer dereferencing at runtime. This closes [#41443](https://github.com/ClickHouse/ClickHouse/issues/41443). [#41449](https://github.com/ClickHouse/ClickHouse/pull/41449) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#41505](https://github.com/ClickHouse/ClickHouse/issues/41505): Writing data in Apache `ORC` format might lead to a buffer overrun. [#41458](https://github.com/ClickHouse/ClickHouse/pull/41458) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#41644](https://github.com/ClickHouse/ClickHouse/issues/41644): Queries with `ORDER BY` and `1500 <= LIMIT <= max_block_size` could return incorrect result with missing rows from top. Fixes [#41182](https://github.com/ClickHouse/ClickHouse/issues/41182). [#41576](https://github.com/ClickHouse/ClickHouse/pull/41576) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#41641](https://github.com/ClickHouse/ClickHouse/issues/41641): Fix possible `pipeline stuck` exception for queries with `OFFSET`. The error was found with `enable_optimize_predicate_expression = 0` and always false condition in `WHERE`. Fixes [#41383](https://github.com/ClickHouse/ClickHouse/issues/41383). [#41588](https://github.com/ClickHouse/ClickHouse/pull/41588) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#41900](https://github.com/ClickHouse/ClickHouse/issues/41900): Fix possible crash in `SELECT` from `Merge` table with enabled `optimize_monotonous_functions_in_order_by` setting. Fixes [#41269](https://github.com/ClickHouse/ClickHouse/issues/41269). [#41740](https://github.com/ClickHouse/ClickHouse/pull/41740) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* use ROBOT_CLICKHOUSE_COMMIT_TOKEN for create-pull-request [#40067](https://github.com/ClickHouse/ClickHouse/pull/40067) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
+* use input token instead of env var [#40421](https://github.com/ClickHouse/ClickHouse/pull/40421) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
+* Migrate artifactory [#40831](https://github.com/ClickHouse/ClickHouse/pull/40831) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* CaresPTRResolver small safety improvement [#40890](https://github.com/ClickHouse/ClickHouse/pull/40890) ([Arthur Passos](https://github.com/arthurpassos)).
+* Docker server version [#41256](https://github.com/ClickHouse/ClickHouse/pull/41256) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Download ccache from release PRs for backports [#41328](https://github.com/ClickHouse/ClickHouse/pull/41328) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Increase open files limit [#41345](https://github.com/ClickHouse/ClickHouse/pull/41345) ([Eugene Konkov](https://github.com/ekonkov)).
+* Remove `-WithTerminatingZero` methods [#41431](https://github.com/ClickHouse/ClickHouse/pull/41431) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Remove trash from Field [#41457](https://github.com/ClickHouse/ClickHouse/pull/41457) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Mask some information in logs. [#41474](https://github.com/ClickHouse/ClickHouse/pull/41474) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Build latest tags ONLY from master branch [#41567](https://github.com/ClickHouse/ClickHouse/pull/41567) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Fix the typo preventing building latest images [#41769](https://github.com/ClickHouse/ClickHouse/pull/41769) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Revert "ColumnVector: optimize UInt8 index with AVX512VBMI ([#41247](https://github.com/ClickHouse/ClickHouse/issues/41247))" [#41797](https://github.com/ClickHouse/ClickHouse/pull/41797) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
--- a/docs/changelogs/v22.8.6.71-lts.md
+++ b/docs/changelogs/v22.8.6.71-lts.md
@ -0,0 +1,56 @@
+---
+sidebar_position: 1
+sidebar_label: 2022
+---
+
+# 2022 Changelog
+
+### ClickHouse release v22.8.6.71-lts (7bf38a43e30) FIXME as compared to v22.8.5.29-lts (74ffb843807)
+
+#### Improvement
+* Backported in [#41507](https://github.com/ClickHouse/ClickHouse/issues/41507): Fix incompatibility of cache after switching setting `do_no_evict_index_and_mark_files` from 1 to 0, 0 to 1. [#41330](https://github.com/ClickHouse/ClickHouse/pull/41330) ([Kseniia Sumarokova](https://github.com/kssenii)).
+
+#### Bug Fix
+* Backported in [#41229](https://github.com/ClickHouse/ClickHouse/issues/41229): Fix memory safety issues with functions `encrypt` and `contingency` if Array of Nullable is used as an argument. This fixes [#41004](https://github.com/ClickHouse/ClickHouse/issues/41004). [#40195](https://github.com/ClickHouse/ClickHouse/pull/40195) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### Build/Testing/Packaging Improvement
+* Backported in [#41560](https://github.com/ClickHouse/ClickHouse/issues/41560): Add `source` field to deb packages, update `nfpm`. [#41531](https://github.com/ClickHouse/ClickHouse/pull/41531) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
+#### Bug Fix (user-visible misbehavior in official stable or prestable release)
+
+* Backported in [#41284](https://github.com/ClickHouse/ClickHouse/issues/41284): Fix unused unknown columns introduced by WITH statement. This fixes [#37812](https://github.com/ClickHouse/ClickHouse/issues/37812) . [#39131](https://github.com/ClickHouse/ClickHouse/pull/39131) ([Amos Bird](https://github.com/amosbird)).
+* Backported in [#41837](https://github.com/ClickHouse/ClickHouse/issues/41837): Fix vertical merge of parts with lightweight deleted rows. [#40559](https://github.com/ClickHouse/ClickHouse/pull/40559) ([Alexander Gololobov](https://github.com/davenger)).
+* Backported in [#41618](https://github.com/ClickHouse/ClickHouse/issues/41618): Fix possible segfaults, use-heap-after-free and memory leak in aggregate function combinators. Closes [#40848](https://github.com/ClickHouse/ClickHouse/issues/40848). [#41083](https://github.com/ClickHouse/ClickHouse/pull/41083) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#41667](https://github.com/ClickHouse/ClickHouse/issues/41667): Queries with `OFFSET` clause in subquery and `WHERE` clause in outer query might return incorrect result, it's fixed. Fixes [#40416](https://github.com/ClickHouse/ClickHouse/issues/40416). [#41280](https://github.com/ClickHouse/ClickHouse/pull/41280) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Backported in [#41685](https://github.com/ClickHouse/ClickHouse/issues/41685): Fix possible wrong query result with `query_plan_optimize_primary_key` enabled. Fixes [#40599](https://github.com/ClickHouse/ClickHouse/issues/40599). [#41281](https://github.com/ClickHouse/ClickHouse/pull/41281) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#41362](https://github.com/ClickHouse/ClickHouse/issues/41362): Fix incorrect logical error `Expected relative path` in disk object storage. Related to [#41246](https://github.com/ClickHouse/ClickHouse/issues/41246). [#41297](https://github.com/ClickHouse/ClickHouse/pull/41297) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Backported in [#41359](https://github.com/ClickHouse/ClickHouse/issues/41359): Add column type check before UUID insertion in MsgPack format. [#41309](https://github.com/ClickHouse/ClickHouse/pull/41309) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#41596](https://github.com/ClickHouse/ClickHouse/issues/41596): Fix possible deadlock with async_socket_for_remote/use_hedged_requests and parallel KILL. [#41343](https://github.com/ClickHouse/ClickHouse/pull/41343) ([Azat Khuzhin](https://github.com/azat)).
+* Backported in [#41521](https://github.com/ClickHouse/ClickHouse/issues/41521): Since 22.8 `ON CLUSTER` clause is ignored if database is `Replicated` and cluster name and database name are the same. Because of this `DROP PARTITION ON CLUSTER` worked unexpected way with `Replicated`. It's fixed, now `ON CLUSTER` clause is ignored only for queries that are replicated on database level. Fixes [#41299](https://github.com/ClickHouse/ClickHouse/issues/41299). [#41390](https://github.com/ClickHouse/ClickHouse/pull/41390) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Backported in [#41480](https://github.com/ClickHouse/ClickHouse/issues/41480): Malicious data in Native format might cause a crash. [#41441](https://github.com/ClickHouse/ClickHouse/pull/41441) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#41806](https://github.com/ClickHouse/ClickHouse/issues/41806): The aggregate function `categorialInformationValue` was having incorrectly defined properties, which might cause a null pointer dereferencing at runtime. This closes [#41443](https://github.com/ClickHouse/ClickHouse/issues/41443). [#41449](https://github.com/ClickHouse/ClickHouse/pull/41449) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#41506](https://github.com/ClickHouse/ClickHouse/issues/41506): Writing data in Apache `ORC` format might lead to a buffer overrun. [#41458](https://github.com/ClickHouse/ClickHouse/pull/41458) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#41581](https://github.com/ClickHouse/ClickHouse/issues/41581): Fix possible hung/deadlock on query cancellation (`KILL QUERY` or server shutdown). [#41467](https://github.com/ClickHouse/ClickHouse/pull/41467) ([Azat Khuzhin](https://github.com/azat)).
+* Backported in [#41645](https://github.com/ClickHouse/ClickHouse/issues/41645): Queries with `ORDER BY` and `1500 <= LIMIT <= max_block_size` could return incorrect result with missing rows from top. Fixes [#41182](https://github.com/ClickHouse/ClickHouse/issues/41182). [#41576](https://github.com/ClickHouse/ClickHouse/pull/41576) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#41642](https://github.com/ClickHouse/ClickHouse/issues/41642): Fix possible `pipeline stuck` exception for queries with `OFFSET`. The error was found with `enable_optimize_predicate_expression = 0` and always false condition in `WHERE`. Fixes [#41383](https://github.com/ClickHouse/ClickHouse/issues/41383). [#41588](https://github.com/ClickHouse/ClickHouse/pull/41588) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#41901](https://github.com/ClickHouse/ClickHouse/issues/41901): Fix possible crash in `SELECT` from `Merge` table with enabled `optimize_monotonous_functions_in_order_by` setting. Fixes [#41269](https://github.com/ClickHouse/ClickHouse/issues/41269). [#41740](https://github.com/ClickHouse/ClickHouse/pull/41740) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#41836](https://github.com/ClickHouse/ClickHouse/issues/41836): Don't allow to create or alter merge tree tables with virtual column name _row_exists, which is reserved for lightweight delete. Fixed [#41716](https://github.com/ClickHouse/ClickHouse/issues/41716). [#41763](https://github.com/ClickHouse/ClickHouse/pull/41763) ([Jianmei Zhang](https://github.com/zhangjmruc)).
+* Backported in [#41890](https://github.com/ClickHouse/ClickHouse/issues/41890): Old versions of Replicated database doesn't have a special marker in [Zoo]Keeper. We need to check only whether the node contains come obscure data instead of special mark. [#41875](https://github.com/ClickHouse/ClickHouse/pull/41875) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Fix stress test after [#40420](https://github.com/ClickHouse/ClickHouse/issues/40420) [#40608](https://github.com/ClickHouse/ClickHouse/pull/40608) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Change default in one cache setting [#41139](https://github.com/ClickHouse/ClickHouse/pull/41139) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Docker server version [#41256](https://github.com/ClickHouse/ClickHouse/pull/41256) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Fix download_binary, use proper version and commit [#41260](https://github.com/ClickHouse/ClickHouse/pull/41260) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Download ccache from release PRs for backports [#41328](https://github.com/ClickHouse/ClickHouse/pull/41328) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Increase open files limit [#41345](https://github.com/ClickHouse/ClickHouse/pull/41345) ([Eugene Konkov](https://github.com/ekonkov)).
+* Remove `-WithTerminatingZero` methods [#41431](https://github.com/ClickHouse/ClickHouse/pull/41431) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Remove trash from Field [#41457](https://github.com/ClickHouse/ClickHouse/pull/41457) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Mask some information in logs. [#41474](https://github.com/ClickHouse/ClickHouse/pull/41474) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Build latest tags ONLY from master branch [#41567](https://github.com/ClickHouse/ClickHouse/pull/41567) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Fix debug build after [#41507](https://github.com/ClickHouse/ClickHouse/issues/41507) [#41597](https://github.com/ClickHouse/ClickHouse/pull/41597) ([Dmitry Novik](https://github.com/novikd)).
+* Revert of "Revert the revert of "ColumnVector: optimize filter with AVX512 VBMI2 compress store" [#40033](https://github.com/ClickHouse/ClickHouse/issues/40033)" [#41752](https://github.com/ClickHouse/ClickHouse/pull/41752) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix the typo preventing building latest images [#41769](https://github.com/ClickHouse/ClickHouse/pull/41769) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Revert "ColumnVector: optimize UInt8 index with AVX512VBMI ([#41247](https://github.com/ClickHouse/ClickHouse/issues/41247))" [#41797](https://github.com/ClickHouse/ClickHouse/pull/41797) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
--- a/docs/changelogs/v22.9.3.18-stable.md
+++ b/docs/changelogs/v22.9.3.18-stable.md
@ -0,0 +1,23 @@
+---
+sidebar_position: 1
+sidebar_label: 2022
+---
+
+# 2022 Changelog
+
+### ClickHouse release v22.9.3.18-stable (0cb4b15d2fa) FIXME as compared to v22.9.2.7-stable (362e2cefcef)
+
+#### Bug Fix (user-visible misbehavior in official stable or prestable release)
+
+* Backported in [#41902](https://github.com/ClickHouse/ClickHouse/issues/41902): Fix possible crash in `SELECT` from `Merge` table with enabled `optimize_monotonous_functions_in_order_by` setting. Fixes [#41269](https://github.com/ClickHouse/ClickHouse/issues/41269). [#41740](https://github.com/ClickHouse/ClickHouse/pull/41740) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#41863](https://github.com/ClickHouse/ClickHouse/issues/41863): 22.9 might fail to startup `ReplicatedMergeTree` table if that table was created by 20.3 or older version and was never altered, it's fixed. Fixes [#41742](https://github.com/ClickHouse/ClickHouse/issues/41742). [#41796](https://github.com/ClickHouse/ClickHouse/pull/41796) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Backported in [#41892](https://github.com/ClickHouse/ClickHouse/issues/41892): Fix compact parts with compressed marks setting. Fixes [#41783](https://github.com/ClickHouse/ClickHouse/issues/41783) and [#41746](https://github.com/ClickHouse/ClickHouse/issues/41746). [#41823](https://github.com/ClickHouse/ClickHouse/pull/41823) ([alesapin](https://github.com/alesapin)).
+* Backported in [#41891](https://github.com/ClickHouse/ClickHouse/issues/41891): Old versions of Replicated database doesn't have a special marker in [Zoo]Keeper. We need to check only whether the node contains come obscure data instead of special mark. [#41875](https://github.com/ClickHouse/ClickHouse/pull/41875) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Revert of "Revert the revert of "ColumnVector: optimize filter with AVX512 VBMI2 compress store" [#40033](https://github.com/ClickHouse/ClickHouse/issues/40033)" [#41752](https://github.com/ClickHouse/ClickHouse/pull/41752) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Update 02354_annoy.sql [#41767](https://github.com/ClickHouse/ClickHouse/pull/41767) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Fix the typo preventing building latest images [#41769](https://github.com/ClickHouse/ClickHouse/pull/41769) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Revert "ColumnVector: optimize UInt8 index with AVX512VBMI ([#41247](https://github.com/ClickHouse/ClickHouse/issues/41247))" [#41797](https://github.com/ClickHouse/ClickHouse/pull/41797) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
--- a/docs/en/development/integrating_rust_libraries.md
+++ b/docs/en/development/integrating_rust_libraries.md
@ -5,11 +5,49 @@ slug: /en/development/integrating_rust_libraries

 Rust library integration will be described based on BLAKE3 hash-function integration.

-The first step is forking a library and making necessary changes for Rust and C/C++ compatibility.
+The first step of integration is to add the library to /rust folder. To do this, you need to create an empty Rust project and include the required library in Cargo.toml. It is also necessary to configure new library compilation as static by adding `crate-type = ["staticlib"]` to Cargo.toml.

-After forking library repository you need to change target settings in Cargo.toml file. Firstly, you need to switch build to static library. Secondly, you need to add cbindgen crate to the crate list. We will use it later to generate C-header automatically.
+Next, you need to link the library to CMake using Corrosion library. The first step is to add the library folder in the CMakeLists.txt inside the /rust folder. After that, you should add the CMakeLists.txt file to the library directory. In it, you need to call the Corrosion import function. These lines were used to import BLAKE3:

-The next step is creating or editing the build.rs script for your library - and enable cbindgen to generate the header during library build. These lines were added to BLAKE3 build script for the same purpose:
+```
+corrosion_import_crate(MANIFEST_PATH Cargo.toml NO_STD)
+
+target_include_directories(_ch_rust_blake3 INTERFACE include)
+add_library(ch_rust::blake3 ALIAS _ch_rust_blake3)
+```
+
+Thus, we will create a correct CMake target using Corrosion, and then rename it with a more convenient name. Note that the name `_ch_rust_blake3` comes from Cargo.toml, where it is used as project name (`name = "_ch_rust_blake3"`).
+
+Since Rust data types are not compatible with C/C++ data types, we will use our empty library project to create shim methods for conversion of data received from C/C++, calling library methods, and inverse conversion for output data. For example, this method was written for BLAKE3:
+
+```
+#[no_mangle]
+pub unsafe extern "C" fn blake3_apply_shim(
+    begin: *const c_char,
+    _size: u32,
+    out_char_data: *mut u8,
+) -> *mut c_char {
+    if begin.is_null() {
+        let err_str = CString::new("input was a null pointer").unwrap();
+        return err_str.into_raw();
+    }
+    let mut hasher = blake3::Hasher::new();
+    let input_bytes = CStr::from_ptr(begin);
+    let input_res = input_bytes.to_bytes();
+    hasher.update(input_res);
+    let mut reader = hasher.finalize_xof();
+    reader.fill(std::slice::from_raw_parts_mut(out_char_data, blake3::OUT_LEN));
+    std::ptr::null_mut()
+}
+```
+
+This method gets C-compatible string, its size and output string pointer as input. Then, it converts C-compatible inputs into types that are used by actual library methods and calls them. After that, it should convert library methods' outputs back into C-compatible type. In that particular case library supported direct writing into pointer by method fill(), so the conversion was not needed. The main advice here is to create less methods, so you will need to do less conversions on each method call and won't create much overhead.
+
+It is worth noting that the `#[no_mangle]` attribute and `extern "C"` are mandatory for all such methods. Without them, it will not be possible to perform a correct C/C++-compatible compilation. Moreover, they are necessary for the next step of the integration.
+
+After writing the code for the shim methods, we need to prepare the header file for the library. This can be done manually, or you can use the cbindgen library for auto-generation. In case of using cbindgen, you will need to write a build.rs build script and include cbindgen as a build-dependency.
+
+An example of a build script that can auto-generate a header file:

 ```
    let crate_dir = env::var("CARGO_MANIFEST_DIR").unwrap();
@ -27,39 +65,9 @@ The next step is creating or editing the build.rs script for your library - and
    }
 ```

-As you can see, script sets the output directory and launches header generation.
-
-The next step is to add CMake files into library directory, so it can build with other submodules. As you can see, BLAKE3 main directory contains two CMake files - CMakeLists.txt and build_rust_lib.cmake. The second one is a function, which calls cargo build and sets all needed paths for library build. You should copy it to your library and then you can adjust cargo flags and other settings for you library needs.
-
-When finished with CMake configuration, you should move on to create a C/C++ compatible API for your library. Let us see BLAKE3's method blake3_apply_shim:
-
-```
-#[no_mangle]
-pub unsafe extern "C" fn blake3_apply_shim(
-    begin: *const c_char,
-    _size: u32,
-    out_char_data: *mut u8,
-) -> *mut c_char {
-    if begin.is_null() {
-        let err_str = CString::new("input was a null pointer").unwrap();
-        return err_str.into_raw();
-    }
-    let mut hasher = Hasher::new();
-    let input_bytes = CStr::from_ptr(begin);
-    let input_res = input_bytes.to_bytes();
-    hasher.update(input_res);
-    let mut reader = hasher.finalize_xof();
-    reader.fill(std::slice::from_raw_parts_mut(out_char_data, OUT_LEN));
-    std::ptr::null_mut()
-}
-```
-
-This method gets C-compatible string, its size and output string pointer as input. Then, it converts C-compatible inputs into types that are used by actual library methods and calls them. After that, it should convert library methods' outputs back into C-compatible type. In that particular case library supported direct writing into pointer by method fill(), so the conversion was not needed. The main advice here is to create less methods, so you will need to do less conversions on each method call and won't create much overhead.
-
 Also, you should use attribute #[no_mangle] and `extern "C"` for every C-compatible attribute. Without it library can compile incorrectly and cbindgen won't launch header autogeneration.

-After all these steps you can test your library in a small project to find all problems with compatibility or header generation. If any problems occur during header generation, you can try to configure it with cbindgen.toml file (you can find an example of it in BLAKE3 directory or a template here: [https://github.com/eqrion/cbindgen/blob/master/template.toml](https://github.com/eqrion/cbindgen/blob/master/template.toml)). If everything works correctly, you can finally integrate its methods into ClickHouse.
+After all these steps you can test your library in a small project to find all problems with compatibility or header generation. If any problems occur during header generation, you can try to configure it with cbindgen.toml file (you can find a template here: [https://github.com/eqrion/cbindgen/blob/master/template.toml](https://github.com/eqrion/cbindgen/blob/master/template.toml)).

-In addition, some problems with integration are worth noting here:
-1) Some architectures may require special cargo flags or build.rs configurations, so you may want to test cross-compilation for different platforms first.
-2) MemorySanitizer can cause false-positive reports as it's unable to see if some variables in Rust are initialized or not. It was solved with writing a method with more explicit definition for some variables, although this implementation of method is slower and is used only to fix MemorySanitizer builds.
+It is worth noting the problem that occurred when integrating BLAKE3:
+MemorySanitizer can cause false-positive reports as it's unable to see if some variables in Rust are initialized or not. It was solved with writing a method with more explicit definition for some variables, although this implementation of method is slower and is used only to fix MemorySanitizer builds.
--- a/docs/en/getting-started/example-datasets/criteo.md
+++ b/docs/en/getting-started/example-datasets/criteo.md
@ -65,7 +65,9 @@ CREATE TABLE criteo
    icat24 UInt32,
    icat25 UInt32,
    icat26 UInt32
-) ENGINE = MergeTree(date, intHash32(icat1), (date, intHash32(icat1)), 8192)
+) ENGINE = MergeTree()
+PARTITION BY toYYYYMM(date)
+ORDER BY (date, icat1)
 ```

 Transform data from the raw log and put it in the second table:
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@ -1498,7 +1498,7 @@ If not set, [tmp_path](#tmp-path) is used, otherwise it is ignored.
 - `move_factor` is ignored.
 - `keep_free_space_bytes` is ignored.
 - `max_data_part_size_bytes` is ignored.
- Уou must have exactly one volume in that policy.
+- Policy should have exactly one volume with local disks.
 :::

 ## uncompressed_cache_size {#server-settings-uncompressed_cache_size}
--- a/docs/ru/development/integrating_rust_libraries.md
+++ b/docs/ru/development/integrating_rust_libraries.md
@ -6,11 +6,50 @@ slug: /ru/development/integrating_rust_libraries

 Интеграция библиотек будет описываться на основе работы проведенной для библиотеки BLAKE3.

-Первым шагом интеграции является создание форка библиотеки для внесения дальнейших изменений по совместимости методов на Rust с C/C++.
+Первым шагом интеграции является добавление библиотеки в папку /rust. Для этого необходимо создать в папке пустой Rust-проект, подключив в Cargo.toml нужную библиотеку. Также необходимо компилировать новую библиотеку как статическую, для этого необходимо добавить `crate-type = ["staticlib"]` в Cargo.toml.

-В форке необходимо будет изменить конфигурацию Cargo.toml, сменив таргет на статическую библиотеку. Кроме того, необходимо добавить crate cbindgen для его дальнейшего использования при сборке.
+Далее необходимо подключить библиотеку к CMake. Для этого в ClickHouse была подключена библиотека Corrosion. Первым шагом является подключение папки с новой библиотекой в корневом CMakeLists.txt папки /rust. После этого следует добавить в директорию с библиотекой файл CMakeLists.txt, в котором будет вызвана функция из Corrosion. Как пример, приведем файл из BLAKE3:

-Необходимо создать либо отредактировать сборочный скрипт build.rs, добавив в него запуск cbindgen - автогенератора заголовочных файлов .h. Пример такого запуска можно увидеть в build.rs для BLAKE3:
+```
+corrosion_import_crate(MANIFEST_PATH Cargo.toml NO_STD)
+
+target_include_directories(_ch_rust_blake3 INTERFACE include)
+add_library(ch_rust::blake3 ALIAS _ch_rust_blake3)
+```
+
+Таким образом, мы создадим при помощи Corrosion корректный CMake-таргет, а затем переобозначим его более понятным именем. Стоит отметить, что имя `_ch_rust_blake3` происходит из Cargo.toml, где оно выступает в качестве имени проекта (`name = "_ch_rust_blake3"`).
+
+
+Поскольку типы данных Rust не совместимы с типами данных C/C++, то в проекте мы опишем интерфейс для методов-прослоек, которые будут преобразовывать данные, получаемые из C/C++, вызывать методы библиотеки, а затем делать преобразование возвращаемых обратно данных. В частности, рассмотрим такой метод, написанный для BLAKE3:
+
+```
+#[no_mangle]
+pub unsafe extern "C" fn blake3_apply_shim(
+    begin: *const c_char,
+    _size: u32,
+    out_char_data: *mut u8,
+) -> *mut c_char {
+    if begin.is_null() {
+        let err_str = CString::new("input was a null pointer").unwrap();
+        return err_str.into_raw();
+    }
+    let mut hasher = blake3::Hasher::new();
+    let input_bytes = CStr::from_ptr(begin);
+    let input_res = input_bytes.to_bytes();
+    hasher.update(input_res);
+    let mut reader = hasher.finalize_xof();
+    reader.fill(std::slice::from_raw_parts_mut(out_char_data, blake3::OUT_LEN));
+    std::ptr::null_mut()
+}
+```
+
+На вход метод принимает строку в C-совместимом формате, её размер и указатель, в который будет положен результат. Кроме того, для того, чтобы иметь возможность вывести ошибку, метод возвращает строку с ней как результат работы (и нулевой указатель в случае отсутствия ошибок). C-совместимые не используются в методах BLAKE3, поэтому они конвертируются посредством соотвествующих структур и методов в привычные форматы для языка Rust. Далее запускаются оригинальные методы библиотеки. Их результат следует преобразовать обратно в C-совместимые структуры, однако в данном случае удается избежать обратной конвертации, поскольку библиотека поддерживает запись напрямую по указателю *mut u8.
+
+Кроме того, стоит отметить обязательность аттрибута #[no_mangle] и указания extern "C" для всех таких методов. Без них не удастся провести корректную совместимую с C/C++ компиляцию. Кроме того, они необходимы для следующего этапа подключения библиотеки.
+
+После написания кода методов-прослоек нам необходимо подготовить заголовочный файл для библиотеки. Это можно сделать вручную, либо воспользоваться библиотекой cbindgen для автогенерации. В случае с использованием cbindgen, нам понадобится написать сборочный скрипт build.rs и подключить cbindgen в качестве build-dependency.
+
+Пример сборочного скрипта, которым можно автосгенерировать заголовочный файл:

 ```
    let crate_dir = env::var("CARGO_MANIFEST_DIR").unwrap();
@ -28,39 +67,7 @@ slug: /ru/development/integrating_rust_libraries
    }
 ```

-Скрипт назначает директорию для создания залоговочного файла и в конце запускает метод генерации cbindgen.
+Если возникают пробемы с генерацией заголовков, может потребоваться поработать с конфигурацией cbindgen через файл cbindgen.toml, взяв оригинальный темплейт разработчика cbindgen: [https://github.com/eqrion/cbindgen/blob/master/template.toml](https://github.com/eqrion/cbindgen/blob/master/template.toml).

-Далее необходимо подключить библиотеку к CMake. В BLAKE3 для этого были созданы два файла - CMakeLists.txt и файл, содержащий функцию для запуска cargo build как таргета, - build_rust_lib.cmake. Последний стоит скопировать в подключаемую библиотеку и отредактировать в соотвествии с требуемыми параметрами для сборки - добавить флаги или какие-либо настройки для разных архитектур.
-
-Завершив настройку CMake, можно приступить к созданию методов-прослоек, которые обеспечат совместимость библиотеки и остального кода ClickHouse. В частности, рассмотрим такой метод, написанный для BLAKE3:
-
-```
-#[no_mangle]
-pub unsafe extern "C" fn blake3_apply_shim(
-    begin: *const c_char,
-    _size: u32,
-    out_char_data: *mut u8,
-) -> *mut c_char {
-    if begin.is_null() {
-        let err_str = CString::new("input was a null pointer").unwrap();
-        return err_str.into_raw();
-    }
-    let mut hasher = Hasher::new();
-    let input_bytes = CStr::from_ptr(begin);
-    let input_res = input_bytes.to_bytes();
-    hasher.update(input_res);
-    let mut reader = hasher.finalize_xof();
-    reader.fill(std::slice::from_raw_parts_mut(out_char_data, OUT_LEN));
-    std::ptr::null_mut()
-}
-```
-
-На вход метод принимает строку в C-совместимом формате, её размер и указатель, в который будет положен результат. Кроме того, для того, чтобы иметь возможность вывести ошибку, метод возвращает строку с ней как результат работы (и нулевой указатель в случае отсутствия ошибок). C-совместимые не используются в методах BLAKE3, поэтому они конвертируются посредством соотвествующих структур и методов в привычные форматы для языка Rust. Далее запускаются оригинальные методы библиотеки. Их результат следует преобразовать обратно в C-совместимые структуры, однако в данном случае удается избежать обратной конвертации, поскольку библиотека поддерживает запись напрямую по указателю *mut u8.
-
-Кроме того, стоит отметить обязательность аттрибута #[no_mangle] и указания extern "C" для всех таких методов. Без них не удастся провести корректную совместимую с C/C++ компиляцию и автогенерацию заголовков.
-
-После этих действий можно протестировать компиляцию и работу методов на небольшом проекте для выявляения несовместимостей и ошибок. Если возникают пробемы с генерацией заголовков, может потребоваться поработать с конфигурацией cbindgen через файл cbindgen.toml, найти который можно либо в BLAKE3, либо взяв оригинальный темплейт разработчика cbindgen: [https://github.com/eqrion/cbindgen/blob/master/template.toml](https://github.com/eqrion/cbindgen/blob/master/template.toml).
-
-В заключение, стоит отметить пару пробелм, возникших при интеграции BLAKE3:
-1) Некоторые архитектуры могут потребовать настройки компиляции в build.rs и в build_rust_lib.cmake в связи со своими особенностями.
-2) MemorySanitizer плохо понимает инициализацию памяти в Rust, поэтому для избежания ложноположительных срабатываний для BLAKE3 был создан альтернативный метод, который более явно, но при этом медленнее, инициализировал память. Он компилируется только для сборки с MemorySanitizer и в релиз не попадает. Вероятно, возможны и более красивые способы решения этой проблемы, но при интеграции BLAKE3 они не были обнаружены.
+В заключение, стоит отметить проблему, с которой пришлось столкнуться при интеграции BLAKE3:
+C++ MemorySanitizer плохо понимает инициализацию памяти в Rust, поэтому для избежания ложноположительных срабатываний для BLAKE3 был создан альтернативный метод, который более явно, но при этом медленнее, инициализировал память. Он компилируется только для сборки с MemorySanitizer и в релиз не попадает. Вероятно, возможны и более красивые способы решения этой проблемы, но при интеграции BLAKE3 они не были обнаружены.
--- a/docs/ru/operations/server-configuration-parameters/settings.md
+++ b/docs/ru/operations/server-configuration-parameters/settings.md
@ -1342,12 +1342,13 @@ TCP порт для защищённого обмена данными с кли

 Если политика не задана, используется [tmp_path](#tmp-path). В противном случае `tmp_path` игнорируется.

-    :::note "Примечание"
-    - `move_factor` игнорируется.
-    - `keep_free_space_bytes` игнорируется.
-    - `max_data_part_size_bytes` игнорируется.
-    - В данной политике у вас должен быть ровно один том.
-    :::
+:::note "Примечание"
+- `move_factor` игнорируется.
+- `keep_free_space_bytes` игнорируется.
+- `max_data_part_size_bytes` игнорируется.
+- В данной политике должен быть ровно один том, содержащий только локальный диски.
+:::
+
 ## uncompressed_cache_size {#server-settings-uncompressed_cache_size}

 Размер кеша (в байтах) для несжатых данных, используемых движками таблиц семейства [MergeTree](../../operations/server-configuration-parameters/settings.md).
--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@ -203,7 +203,7 @@ void LocalServer::tryInitPath()

    global_context->setPath(path);

-    global_context->setTemporaryStorage(path + "tmp");
+    global_context->setTemporaryStorage(path + "tmp", "", 0);
    global_context->setFlagsPath(path + "flags");

    global_context->setUserFilesPath(""); // user's files are everywhere
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@ -209,7 +209,7 @@ try
            fs::remove(it->path());
        }
        else
-            LOG_DEBUG(log, "Skipped file in temporary path {}", it->path().string());
+            LOG_DEBUG(log, "Found unknown file in temporary path {}", it->path().string());
    }
 }
 catch (...)
@ -971,7 +971,8 @@ int Server::main(const std::vector<std::string> & /*args*/)
    {
        std::string tmp_path = config().getString("tmp_path", path / "tmp/");
        std::string tmp_policy = config().getString("tmp_policy", "");
-        const VolumePtr & volume = global_context->setTemporaryStorage(tmp_path, tmp_policy);
+        size_t tmp_max_size = config().getUInt64("tmp_max_size", 0);
+        const VolumePtr & volume = global_context->setTemporaryStorage(tmp_path, tmp_policy, tmp_max_size);
        for (const DiskPtr & disk : volume->getDisks())
            setupTmpPath(log, disk->getPath());
    }
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -399,6 +399,9 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
    M(UInt64, max_network_bandwidth_for_user, 0, "The maximum speed of data exchange over the network in bytes per second for all concurrently running user queries. Zero means unlimited.", 0)\
    M(UInt64, max_network_bandwidth_for_all_users, 0, "The maximum speed of data exchange over the network in bytes per second for all concurrently running queries. Zero means unlimited.", 0) \
    \
+    M(UInt64, max_temporary_data_on_disk_size_for_user, 0, "The maximum amount of data consumed by temporary files on disk in bytes for all concurrently running user queries. Zero means unlimited.", 0)\
+    M(UInt64, max_temporary_data_on_disk_size_for_query, 0, "The maximum amount of data consumed by temporary files on disk in bytes for all concurrently running queries. Zero means unlimited.", 0)\
+    \
    M(UInt64, backup_threads, 16, "The maximum number of threads to execute BACKUP requests.", 0) \
    M(UInt64, restore_threads, 16, "The maximum number of threads to execute RESTORE requests.", 0) \
    \
--- a/src/Disks/DiskDecorator.cpp
+++ b/src/Disks/DiskDecorator.cpp
@ -241,4 +241,11 @@ DiskObjectStoragePtr DiskDecorator::createDiskObjectStorage()
    return delegate->createDiskObjectStorage();
 }

+DiskPtr DiskDecorator::getNestedDisk() const
+{
+    if (const auto * decorator = dynamic_cast<const DiskDecorator *>(delegate.get()))
+        return decorator->getNestedDisk();
+    return delegate;
+}
+
 }
--- a/src/Disks/DiskDecorator.h
+++ b/src/Disks/DiskDecorator.h
@ -107,6 +107,8 @@ public:
    bool supportsChmod() const override { return delegate->supportsChmod(); }
    void chmod(const String & path, mode_t mode) override { delegate->chmod(path, mode); }

+    virtual DiskPtr getNestedDisk() const;
+
 protected:
    Executor & getExecutor() override;

--- a/src/Disks/DiskRestartProxy.cpp
+++ b/src/Disks/DiskRestartProxy.cpp
@ -331,6 +331,20 @@ void DiskRestartProxy::getRemotePathsRecursive(
    return DiskDecorator::getRemotePathsRecursive(path, paths_map);
 }

+DiskPtr DiskRestartProxy::getNestedDisk() const
+{
+    DiskPtr delegate_copy;
+
+    {
+        ReadLock lock (mutex);
+        delegate_copy = delegate;
+    }
+
+    if (const auto * decorator = dynamic_cast<const DiskDecorator *>(delegate_copy.get()))
+        return decorator->getNestedDisk();
+    return delegate_copy;
+}
+
 void DiskRestartProxy::restart(ContextPtr context)
 {
    /// Speed up processing unhealthy requests.
--- a/src/Disks/DiskRestartProxy.h
+++ b/src/Disks/DiskRestartProxy.h
@ -71,6 +71,8 @@ public:

    void restart(ContextPtr context);

+    DiskPtr getNestedDisk() const override;
+
 private:
    friend class RestartAwareReadBuffer;
    friend class RestartAwareWriteBuffer;
--- a/src/Disks/IDisk.h
+++ b/src/Disks/IDisk.h
@ -74,6 +74,10 @@ public:
    /// Returns valid reservation or nullptr when failure.
    virtual ReservationPtr reserve(UInt64 bytes) = 0;

+    /// Whether this is a disk or a volume.
+    virtual bool isDisk() const { return false; }
+    virtual bool isVolume() const { return false; }
+
    virtual ~Space() = default;
 };

@ -108,6 +112,9 @@ public:
    {
    }

+    /// This is a disk.
+    bool isDisk() const override { return true; }
+
    virtual DiskTransactionPtr createTransaction();

    /// Root path for all files stored on the disk.
--- a/src/Disks/IStoragePolicy.h
+++ b/src/Disks/IStoragePolicy.h
@ -55,6 +55,7 @@ public:
    /// Get volume by index.
    virtual VolumePtr getVolume(size_t index) const = 0;
    virtual VolumePtr tryGetVolumeByName(const String & volume_name) const = 0;
+    virtual VolumePtr tryGetVolumeByDisk(const DiskPtr & disk_ptr) const = 0;
    VolumePtr getVolumeByName(const String & volume_name) const;
    /// Checks if storage policy can be replaced by another one.
    virtual void checkCompatibleWith(const StoragePolicyPtr & new_storage_policy) const = 0;
--- a/src/Disks/IVolume.h
+++ b/src/Disks/IVolume.h
@ -66,6 +66,9 @@ public:

    virtual ReservationPtr reserve(UInt64 bytes) override = 0;

+    /// This is a volume.
+    bool isVolume() const override { return true; }
+
    /// Volume name from config
    const String & getName() const override { return name; }
    virtual VolumeType getType() const = 0;
--- a/src/Disks/StoragePolicy.cpp
+++ b/src/Disks/StoragePolicy.cpp
@ -322,6 +322,15 @@ size_t StoragePolicy::getVolumeIndexByDisk(const DiskPtr & disk_ptr) const
 }


+VolumePtr StoragePolicy::tryGetVolumeByDisk(const DiskPtr & disk_ptr) const
+{
+    auto it = volume_index_by_disk_name.find(disk_ptr->getName());
+    if (it == volume_index_by_disk_name.end())
+        return nullptr;
+    return getVolume(it->second);
+}
+
+
 void StoragePolicy::buildVolumeIndices()
 {
    for (size_t index = 0; index < volumes.size(); ++index)
--- a/src/Disks/StoragePolicy.h
+++ b/src/Disks/StoragePolicy.h
@ -85,6 +85,9 @@ public:

    VolumePtr tryGetVolumeByName(const String & volume_name) const override;

+    /// Finds a volume which contains a specified disk.
+    VolumePtr tryGetVolumeByDisk(const DiskPtr & disk_ptr) const override;
+
    /// Checks if storage policy can be replaced by another one.
    void checkCompatibleWith(const StoragePolicyPtr & new_storage_policy) const override;

--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@ -247,8 +247,8 @@ InputFormatPtr FormatFactory::getInput(
            { return input_getter(input, sample, row_input_format_params, format_settings); };

        ParallelParsingInputFormat::Params params{
-            buf, sample, parser_creator, file_segmentation_engine, name, settings.max_threads, settings.min_chunk_bytes_for_parallel_parsing,
-               context->getApplicationType() == Context::ApplicationType::SERVER};
+            buf, sample, parser_creator, file_segmentation_engine, name, settings.max_threads,
+            settings.min_chunk_bytes_for_parallel_parsing, max_block_size, context->getApplicationType() == Context::ApplicationType::SERVER};
        auto format = std::make_shared<ParallelParsingInputFormat>(params);
        if (!settings.input_format_record_errors_file_path.toString().empty())
        {
--- a/src/Formats/FormatFactory.h
+++ b/src/Formats/FormatFactory.h
@ -61,13 +61,16 @@ public:
    using ReadCallback = std::function<void()>;

    /** Fast reading data from buffer and save result to memory.
-      * Reads at least min_chunk_bytes and some more until the end of the chunk, depends on the format.
+      * Reads at least `min_bytes` and some more until the end of the chunk, depends on the format.
+      * If `max_rows` is non-zero the function also stops after reading the `max_rows` number of rows
+      * (even if the `min_bytes` boundary isn't reached yet).
      * Used in ParallelParsingInputFormat.
      */
    using FileSegmentationEngine = std::function<std::pair<bool, size_t>(
        ReadBuffer & buf,
        DB::Memory<Allocator<false>> & memory,
-        size_t min_chunk_bytes)>;
+        size_t min_bytes,
+        size_t max_rows)>;

    /// This callback allows to perform some additional actions after writing a single row.
    /// It's initial purpose was to flush Kafka message for each row.
--- a/src/Formats/JSONUtils.cpp
+++ b/src/Formats/JSONUtils.cpp
@ -33,7 +33,7 @@ namespace JSONUtils

    template <const char opening_bracket, const char closing_bracket>
    static std::pair<bool, size_t>
-    fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size, size_t min_rows)
+    fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t min_rows, size_t max_rows)
    {
        skipWhitespaceIfAny(in);

@ -41,14 +41,17 @@ namespace JSONUtils
        size_t balance = 0;
        bool quotes = false;
        size_t number_of_rows = 0;
+        bool need_more_data = true;

-        while (loadAtPosition(in, memory, pos)
-               && (balance || memory.size() + static_cast<size_t>(pos - in.position()) < min_chunk_size || number_of_rows < min_rows))
+        if (max_rows && (max_rows < min_rows))
+            max_rows = min_rows;
+
+        while (loadAtPosition(in, memory, pos) && need_more_data)
        {
            const auto current_object_size = memory.size() + static_cast<size_t>(pos - in.position());
-            if (min_chunk_size != 0 && current_object_size > 10 * min_chunk_size)
+            if (min_bytes != 0 && current_object_size > 10 * min_bytes)
                throw ParsingException(
-                    "Size of JSON object is extremely large. Expected not greater than " + std::to_string(min_chunk_size)
+                    "Size of JSON object is extremely large. Expected not greater than " + std::to_string(min_bytes)
                        + " bytes, but current is " + std::to_string(current_object_size)
                        + " bytes per row. Increase the value setting 'min_chunk_bytes_for_parallel_parsing' or check your data manually, most likely JSON is malformed",
                    ErrorCodes::INCORRECT_DATA);
@ -106,7 +109,12 @@ namespace JSONUtils
                }

                if (balance == 0)
+                {
                    ++number_of_rows;
+                    if ((number_of_rows >= min_rows)
+                        && ((memory.size() + static_cast<size_t>(pos - in.position()) >= min_bytes) || (number_of_rows == max_rows)))
+                        need_more_data = false;
+                }
            }
        }

@ -118,7 +126,7 @@ namespace JSONUtils
    static String readJSONEachRowLineIntoStringImpl(ReadBuffer & in)
    {
        Memory memory;
-        fileSegmentationEngineJSONEachRowImpl<opening_bracket, closing_bracket>(in, memory, 0, 1);
+        fileSegmentationEngineJSONEachRowImpl<opening_bracket, closing_bracket>(in, memory, 0, 1, 1);
        return String(memory.data(), memory.size());
    }

@ -297,15 +305,15 @@ namespace JSONUtils
        return data_types;
    }

-    std::pair<bool, size_t> fileSegmentationEngineJSONEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size)
+    std::pair<bool, size_t> fileSegmentationEngineJSONEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t max_rows)
    {
-        return fileSegmentationEngineJSONEachRowImpl<'{', '}'>(in, memory, min_chunk_size, 1);
+        return fileSegmentationEngineJSONEachRowImpl<'{', '}'>(in, memory, min_bytes, 1, max_rows);
    }

    std::pair<bool, size_t>
-    fileSegmentationEngineJSONCompactEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size, size_t min_rows)
+    fileSegmentationEngineJSONCompactEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t min_rows, size_t max_rows)
    {
-        return fileSegmentationEngineJSONEachRowImpl<'[', ']'>(in, memory, min_chunk_size, min_rows);
+        return fileSegmentationEngineJSONEachRowImpl<'[', ']'>(in, memory, min_bytes, min_rows, max_rows);
    }

    struct JSONEachRowFieldsExtractor
--- a/src/Formats/JSONUtils.h
+++ b/src/Formats/JSONUtils.h
@ -15,9 +15,8 @@ namespace DB

 namespace JSONUtils
 {
-    std::pair<bool, size_t> fileSegmentationEngineJSONEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size);
-    std::pair<bool, size_t>
-    fileSegmentationEngineJSONCompactEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size, size_t min_rows);
+    std::pair<bool, size_t> fileSegmentationEngineJSONEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t max_rows);
+    std::pair<bool, size_t> fileSegmentationEngineJSONCompactEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t min_rows, size_t max_rows);

    /// Parse JSON from string and convert it's type to ClickHouse type. Make the result type always Nullable.
    /// JSON array with different nested types is treated as Tuple.
--- a/src/Formats/NativeReader.cpp
+++ b/src/Formats/NativeReader.cpp
@ -237,6 +237,7 @@ Block NativeReader::read()
            else
                tmp_res.insert({col.type->createColumn()->cloneResized(rows), col.type, col.name});
        }
+        tmp_res.info = res.info;

        res.swap(tmp_res);
    }
--- a/src/Formats/TemporaryFileStreamLegacy.cpp
+++ b/src/Formats/TemporaryFileStreamLegacy.cpp
@ -1,4 +1,4 @@
-#include <Formats/TemporaryFileStream.h>
+#include <Formats/TemporaryFileStreamLegacy.h>
 #include <Formats/NativeReader.h>
 #include <Formats/NativeWriter.h>
 #include <Processors/Executors/PullingPipelineExecutor.h>
@ -12,20 +12,20 @@ namespace DB
 {

 /// To read the data that was flushed into the temporary data file.
-TemporaryFileStream::TemporaryFileStream(const std::string & path)
+TemporaryFileStreamLegacy::TemporaryFileStreamLegacy(const std::string & path)
    : file_in(path)
    , compressed_in(file_in)
    , block_in(std::make_unique<NativeReader>(compressed_in, DBMS_TCP_PROTOCOL_VERSION))
 {}

-TemporaryFileStream::TemporaryFileStream(const std::string & path, const Block & header_)
+TemporaryFileStreamLegacy::TemporaryFileStreamLegacy(const std::string & path, const Block & header_)
    : file_in(path)
    , compressed_in(file_in)
    , block_in(std::make_unique<NativeReader>(compressed_in, header_, 0))
 {}

 /// Flush data from input stream into file for future reading
-TemporaryFileStream::Stat TemporaryFileStream::write(const std::string & path, const Block & header, QueryPipelineBuilder builder, const std::string & codec)
+TemporaryFileStreamLegacy::Stat TemporaryFileStreamLegacy::write(const std::string & path, const Block & header, QueryPipelineBuilder builder, const std::string & codec)
 {
    WriteBufferFromFile file_buf(path);
    CompressedWriteBuffer compressed_buf(file_buf, CompressionCodecFactory::instance().get(codec, {}));
--- a/src/Formats/TemporaryFileStreamLegacy.h
+++ b/src/Formats/TemporaryFileStreamLegacy.h
@ -9,8 +9,10 @@
 namespace DB
 {

+/// Used only in MergeJoin
+/// TODO: use `TemporaryDataOnDisk` instead
 /// To read the data that was flushed into the temporary data file.
-struct TemporaryFileStream
+struct TemporaryFileStreamLegacy
 {
    struct Stat
    {
@ -22,8 +24,8 @@ struct TemporaryFileStream
    CompressedReadBuffer compressed_in;
    std::unique_ptr<NativeReader> block_in;

-    explicit TemporaryFileStream(const std::string & path);
-    TemporaryFileStream(const std::string & path, const Block & header_);
+    explicit TemporaryFileStreamLegacy(const std::string & path);
+    TemporaryFileStreamLegacy(const std::string & path, const Block & header_);

    /// Flush data from input stream into file for future reading
    static Stat write(const std::string & path, const Block & header, QueryPipelineBuilder builder, const std::string & codec);
--- a/src/Functions/FunctionBinaryArithmetic.h
+++ b/src/Functions/FunctionBinaryArithmetic.h
@ -1766,12 +1766,12 @@ public:
                    return {true, is_constant_positive, true};
                }
            }
-            return {false, true, false};
+            return {false, true, false, false};
        }

        // For simplicity, we treat every single value interval as positive monotonic.
        if (applyVisitor(FieldVisitorAccurateEquals(), left_point, right_point))
-            return {true, true, false};
+            return {true, true, false, false};

        if (name_view == "minus" || name_view == "plus")
        {
@ -1797,18 +1797,18 @@ public:
                    // Check if there is an overflow
                    if (applyVisitor(FieldVisitorAccurateLess(), left_point, right_point)
                            == applyVisitor(FieldVisitorAccurateLess(), transform(left_point), transform(right_point)))
-                        return {true, true, false};
+                        return {true, true, false, true};
                    else
-                        return {false, true, false};
+                        return {false, true, false, false};
                }
                else
                {
                    // Check if there is an overflow
                    if (applyVisitor(FieldVisitorAccurateLess(), left_point, right_point)
                            != applyVisitor(FieldVisitorAccurateLess(), transform(left_point), transform(right_point)))
-                        return {true, false, false};
+                        return {true, false, false, true};
                    else
-                        return {false, false, false};
+                        return {false, false, false, false};
                }
            }
            // variable +|- constant
@ -1829,31 +1829,33 @@ public:
                // Check if there is an overflow
                if (applyVisitor(FieldVisitorAccurateLess(), left_point, right_point)
                    == applyVisitor(FieldVisitorAccurateLess(), transform(left_point), transform(right_point)))
-                    return {true, true, false};
+                    return {true, true, false, true};
                else
-                    return {false, true, false};
+                    return {false, true, false, false};
            }
        }
        if (name_view == "divide" || name_view == "intDiv")
        {
+            bool is_strict = name_view == "divide";
+
            // const / variable
            if (left.column && isColumnConst(*left.column))
            {
                auto constant = (*left.column)[0];
                if (applyVisitor(FieldVisitorAccurateEquals(), constant, Field(0)))
-                    return {true, true, false}; // 0 / 0 is undefined, thus it's not always monotonic
+                    return {true, true, false, false}; // 0 / 0 is undefined, thus it's not always monotonic

                bool is_constant_positive = applyVisitor(FieldVisitorAccurateLess(), Field(0), constant);
                if (applyVisitor(FieldVisitorAccurateLess(), left_point, Field(0))
                    && applyVisitor(FieldVisitorAccurateLess(), right_point, Field(0)))
                {
-                    return {true, is_constant_positive, false};
+                    return {true, is_constant_positive, false, is_strict};
                }
                else if (
                    applyVisitor(FieldVisitorAccurateLess(), Field(0), left_point)
                    && applyVisitor(FieldVisitorAccurateLess(), Field(0), right_point))
                {
-                    return {true, !is_constant_positive, false};
+                    return {true, !is_constant_positive, false, is_strict};
                }
            }
            // variable / constant
@ -1861,11 +1863,11 @@ public:
            {
                auto constant = (*right.column)[0];
                if (applyVisitor(FieldVisitorAccurateEquals(), constant, Field(0)))
-                    return {false, true, false}; // variable / 0 is undefined, let's treat it as non-monotonic
+                    return {false, true, false, false}; // variable / 0 is undefined, let's treat it as non-monotonic

                bool is_constant_positive = applyVisitor(FieldVisitorAccurateLess(), Field(0), constant);
                // division is saturated to `inf`, thus it doesn't have overflow issues.
-                return {true, is_constant_positive, true};
+                return {true, is_constant_positive, true, is_strict};
            }
        }
        return {false, true, false};
--- a/src/Functions/IFunction.h
+++ b/src/Functions/IFunction.h
@ -265,9 +265,10 @@ public:
    /// The property of monotonicity for a certain range.
    struct Monotonicity
    {
-        bool is_monotonic = false;    /// Is the function monotonous (non-decreasing or non-increasing).
-        bool is_positive = true;    /// true if the function is non-decreasing, false if non-increasing. If is_monotonic = false, then it does not matter.
+        bool is_monotonic = false;   /// Is the function monotonous (non-decreasing or non-increasing).
+        bool is_positive = true;     /// true if the function is non-decreasing, false if non-increasing. If is_monotonic = false, then it does not matter.
        bool is_always_monotonic = false; /// Is true if function is monotonic on the whole input range I
+        bool is_strict = false;      /// true if the function is strictly decreasing or increasing.
    };

    /** Get information about monotonicity on a range of values. Call only if hasInformationAboutMonotonicity.
--- a/src/Functions/abs.cpp
+++ b/src/Functions/abs.cpp
@ -46,7 +46,7 @@ template <> struct FunctionUnaryArithmeticMonotonicity<NameAbs>
        if ((left_float < 0 && right_float > 0) || (left_float > 0 && right_float < 0))
            return {};

-        return { .is_monotonic = true, .is_positive = left_float > 0 };
+        return { .is_monotonic = true, .is_positive = left_float > 0, .is_strict = true, };
    }
 };

--- a/src/Functions/fromModifiedJulianDay.cpp
+++ b/src/Functions/fromModifiedJulianDay.cpp
@ -139,7 +139,7 @@ namespace DB

        Monotonicity getMonotonicityForRange(const IDataType &, const Field &, const Field &) const override
        {
-            return { .is_monotonic = true, .is_always_monotonic = true };
+            return { .is_monotonic = true, .is_always_monotonic = true, .is_strict = true, };
        }

    private:
--- a/src/Functions/intExp10.cpp
+++ b/src/Functions/intExp10.cpp
@ -55,7 +55,7 @@ template <> struct FunctionUnaryArithmeticMonotonicity<NameIntExp10>
        if (left_float < 0 || right_float > 19)
            return {};

-        return { .is_monotonic = true };
+        return { .is_monotonic = true, .is_strict = true };
    }
 };

--- a/src/Functions/intExp2.cpp
+++ b/src/Functions/intExp2.cpp
@ -58,7 +58,7 @@ template <> struct FunctionUnaryArithmeticMonotonicity<NameIntExp2>
        if (left_float < 0 || right_float > 63)
            return {};

-        return { .is_monotonic = true };
+        return { .is_monotonic = true, .is_strict = true, };
    }
 };

--- a/src/Functions/negate.cpp
+++ b/src/Functions/negate.cpp
@ -42,7 +42,7 @@ template <> struct FunctionUnaryArithmeticMonotonicity<NameNegate>
    static bool has() { return true; }
    static IFunction::Monotonicity get(const Field &, const Field &)
    {
-        return { .is_monotonic = true, .is_positive = false };
+        return { .is_monotonic = true, .is_positive = false, .is_strict = true };
    }
 };

--- a/src/Functions/toModifiedJulianDay.cpp
+++ b/src/Functions/toModifiedJulianDay.cpp
@ -157,7 +157,7 @@ namespace DB

        Monotonicity getMonotonicityForRange(const IDataType &, const Field &, const Field &) const override
        {
-            return { .is_monotonic = true, .is_always_monotonic = true };
+            return { .is_monotonic = true, .is_always_monotonic = true, .is_strict = true };
        }

    private:
--- a/src/Functions/toTimezone.cpp
+++ b/src/Functions/toTimezone.cpp
@ -67,7 +67,7 @@ public:
    Monotonicity getMonotonicityForRange(const IDataType & /*type*/, const Field & /*left*/, const Field & /*right*/) const override
    {
        const bool b = is_constant_timezone;
-        return { .is_monotonic = b, .is_positive = b, .is_always_monotonic = b };
+        return { .is_monotonic = b, .is_positive = b, .is_always_monotonic = b, .is_strict = b };
    }

 private:
--- a/src/Interpreters/Aggregator.cpp
+++ b/src/Interpreters/Aggregator.cpp
@ -35,6 +35,7 @@
 #include <Interpreters/JIT/CompiledExpressionCache.h>
 #include <Core/ProtocolDefines.h>
 #include <Disks/TemporaryFileOnDisk.h>
+#include <Interpreters/TemporaryDataOnDisk.h>

 #include <Parsers/ASTSelectQuery.h>

@ -59,6 +60,20 @@ namespace CurrentMetrics
    extern const Metric TemporaryFilesForAggregation;
 }

+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int UNKNOWN_AGGREGATED_DATA_VARIANT;
+    extern const int TOO_MANY_ROWS;
+    extern const int EMPTY_DATA_PASSED;
+    extern const int CANNOT_MERGE_DIFFERENT_AGGREGATED_DATA_VARIANTS;
+    extern const int LOGICAL_ERROR;
+}
+
+}
+
 namespace
 {
 /** Collects observed HashMap-s sizes to avoid redundant intermediate resizes.
@ -311,17 +326,6 @@ size_t getMinBytesForPrefetch()
 namespace DB
 {

-namespace ErrorCodes
-{
-    extern const int UNKNOWN_AGGREGATED_DATA_VARIANT;
-    extern const int NOT_ENOUGH_SPACE;
-    extern const int TOO_MANY_ROWS;
-    extern const int EMPTY_DATA_PASSED;
-    extern const int CANNOT_MERGE_DIFFERENT_AGGREGATED_DATA_VARIANTS;
-    extern const int LOGICAL_ERROR;
-}
-
-
 AggregatedDataVariants::~AggregatedDataVariants()
 {
    if (aggregator && !aggregator->all_aggregates_has_trivial_destructor)
@ -566,6 +570,7 @@ Aggregator::Aggregator(const Block & header_, const Params & params_)
    : header(header_)
    , keys_positions(calculateKeysPositions(header, params_))
    , params(params_)
+    , tmp_data(params.tmp_data_scope ? std::make_unique<TemporaryDataOnDisk>(params.tmp_data_scope) : nullptr)
    , min_bytes_for_prefetch(getMinBytesForPrefetch())
 {
    /// Use query-level memory tracker
@ -1562,30 +1567,28 @@ bool Aggregator::executeOnBlock(Columns columns,

 void Aggregator::writeToTemporaryFile(AggregatedDataVariants & data_variants, size_t max_temp_file_size) const
 {
+    if (!tmp_data)
+        throw Exception("Cannot write to temporary file because temporary file is not initialized", ErrorCodes::LOGICAL_ERROR);
+
    Stopwatch watch;
    size_t rows = data_variants.size();

-    auto file = createTempFile(max_temp_file_size);
-
-    const auto & path = file->path();
-    WriteBufferFromFile file_buf(path);
-    CompressedWriteBuffer compressed_buf(file_buf);
-    NativeWriter block_out(compressed_buf, DBMS_TCP_PROTOCOL_VERSION, getHeader(false));
-
-    LOG_DEBUG(log, "Writing part of aggregation data into temporary file {}", path);
+    auto & out_stream = tmp_data->createStream(getHeader(false), CurrentMetrics::TemporaryFilesForAggregation, max_temp_file_size);
    ProfileEvents::increment(ProfileEvents::ExternalAggregationWritePart);

+    LOG_DEBUG(log, "Writing part of aggregation data into temporary file {}", out_stream.path());
+
    /// Flush only two-level data and possibly overflow data.

 #define M(NAME) \
    else if (data_variants.type == AggregatedDataVariants::Type::NAME) \
-        writeToTemporaryFileImpl(data_variants, *data_variants.NAME, block_out);
+        writeToTemporaryFileImpl(data_variants, *data_variants.NAME, out_stream);

    if (false) {} // NOLINT
    APPLY_FOR_VARIANTS_TWO_LEVEL(M)
 #undef M
    else
-        throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT);
+        throw Exception("Unknown aggregated data variant", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT);

    /// NOTE Instead of freeing up memory and creating new hash tables and arenas, you can re-use the old ones.
    data_variants.init(data_variants.type);
@ -1598,62 +1601,32 @@ void Aggregator::writeToTemporaryFile(AggregatedDataVariants & data_variants, si
        data_variants.without_key = place;
    }

-    block_out.flush();
-    compressed_buf.next();
-    file_buf.next();
+    auto stat = out_stream.finishWriting();
+
+    ProfileEvents::increment(ProfileEvents::ExternalAggregationCompressedBytes, stat.compressed_size);
+    ProfileEvents::increment(ProfileEvents::ExternalAggregationUncompressedBytes, stat.uncompressed_size);
+    ProfileEvents::increment(ProfileEvents::ExternalProcessingCompressedBytesTotal, stat.compressed_size);
+    ProfileEvents::increment(ProfileEvents::ExternalProcessingUncompressedBytesTotal, stat.uncompressed_size);

    double elapsed_seconds = watch.elapsedSeconds();
-    size_t compressed_bytes = file_buf.count();
-    size_t uncompressed_bytes = compressed_buf.count();
-
-    {
-        std::lock_guard lock(temporary_files.mutex);
-        temporary_files.files.emplace_back(std::move(file));
-        temporary_files.sum_size_uncompressed += uncompressed_bytes;
-        temporary_files.sum_size_compressed += compressed_bytes;
-    }
-
-    ProfileEvents::increment(ProfileEvents::ExternalAggregationCompressedBytes, compressed_bytes);
-    ProfileEvents::increment(ProfileEvents::ExternalAggregationUncompressedBytes, uncompressed_bytes);
-    ProfileEvents::increment(ProfileEvents::ExternalProcessingCompressedBytesTotal, compressed_bytes);
-    ProfileEvents::increment(ProfileEvents::ExternalProcessingUncompressedBytesTotal, uncompressed_bytes);
-
+    double compressed_size = stat.compressed_size;
+    double uncompressed_size = stat.uncompressed_size;
    LOG_DEBUG(log,
        "Written part in {:.3f} sec., {} rows, {} uncompressed, {} compressed,"
        " {:.3f} uncompressed bytes per row, {:.3f} compressed bytes per row, compression rate: {:.3f}"
        " ({:.3f} rows/sec., {}/sec. uncompressed, {}/sec. compressed)",
        elapsed_seconds,
        rows,
-        ReadableSize(uncompressed_bytes),
-        ReadableSize(compressed_bytes),
-        static_cast<double>(uncompressed_bytes) / rows,
-        static_cast<double>(compressed_bytes) / rows,
-        static_cast<double>(uncompressed_bytes) / compressed_bytes,
+        ReadableSize(uncompressed_size),
+        ReadableSize(compressed_size),
+        static_cast<double>(uncompressed_size) / rows,
+        static_cast<double>(compressed_size) / rows,
+        static_cast<double>(uncompressed_size) / compressed_size,
        static_cast<double>(rows) / elapsed_seconds,
-        ReadableSize(static_cast<double>(uncompressed_bytes) / elapsed_seconds),
-        ReadableSize(static_cast<double>(compressed_bytes) / elapsed_seconds));
+        ReadableSize(static_cast<double>(uncompressed_size) / elapsed_seconds),
+        ReadableSize(static_cast<double>(compressed_size) / elapsed_seconds));
 }

-
-TemporaryFileOnDiskHolder Aggregator::createTempFile(size_t max_temp_file_size) const
-{
-    auto file = std::make_unique<TemporaryFileOnDisk>(params.tmp_volume->getDisk(), CurrentMetrics::TemporaryFilesForAggregation);
-
-    // enoughSpaceInDirectory() is not enough to make it right, since
-    // another process (or another thread of aggregator) can consume all
-    // space.
-    //
-    // But true reservation (IVolume::reserve()) cannot be used here since
-    // current_memory_usage does not takes compression into account and
-    // will reserve way more that actually will be used.
-    //
-    // Hence let's do a simple check.
-    if (max_temp_file_size > 0 && !enoughSpaceInDirectory(file->getPath(), max_temp_file_size))
-        throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Not enough space for external aggregation in '{}'", file->path());
-    return file;
-}
-
-
 template <typename Method>
 Block Aggregator::convertOneBucketToBlock(
    AggregatedDataVariants & data_variants,
@ -1703,7 +1676,7 @@ template <typename Method>
 void Aggregator::writeToTemporaryFileImpl(
    AggregatedDataVariants & data_variants,
    Method & method,
-    NativeWriter & out) const
+    TemporaryFileStream & out) const
 {
    size_t max_temporary_block_size_rows = 0;
    size_t max_temporary_block_size_bytes = 0;
--- a/src/Interpreters/Aggregator.h
+++ b/src/Interpreters/Aggregator.h
@ -29,6 +29,7 @@
 #include <Interpreters/AggregateDescription.h>
 #include <Interpreters/AggregationCommon.h>
 #include <Interpreters/JIT/compileFunction.h>
+#include <Interpreters/TemporaryDataOnDisk.h>

 #include <Columns/ColumnString.h>
 #include <Columns/ColumnFixedString.h>
@ -925,7 +926,7 @@ public:
        /// Return empty result when aggregating without keys on empty set.
        bool empty_result_for_aggregation_by_empty_set;

-        VolumePtr tmp_volume;
+        TemporaryDataOnDiskScopePtr tmp_data_scope;

        /// Settings is used to determine cache size. No threads are created.
        size_t max_threads;
@ -970,7 +971,7 @@ public:
            size_t group_by_two_level_threshold_bytes_,
            size_t max_bytes_before_external_group_by_,
            bool empty_result_for_aggregation_by_empty_set_,
-            VolumePtr tmp_volume_,
+            TemporaryDataOnDiskScopePtr tmp_data_scope_,
            size_t max_threads_,
            size_t min_free_disk_space_,
            bool compile_aggregate_expressions_,
@ -990,7 +991,7 @@ public:
            , group_by_two_level_threshold_bytes(group_by_two_level_threshold_bytes_)
            , max_bytes_before_external_group_by(max_bytes_before_external_group_by_)
            , empty_result_for_aggregation_by_empty_set(empty_result_for_aggregation_by_empty_set_)
-            , tmp_volume(tmp_volume_)
+            , tmp_data_scope(std::move(tmp_data_scope_))
            , max_threads(max_threads_)
            , min_free_disk_space(min_free_disk_space_)
            , compile_aggregate_expressions(compile_aggregate_expressions_)
@ -1071,25 +1072,9 @@ public:
    /// For external aggregation.
    void writeToTemporaryFile(AggregatedDataVariants & data_variants, size_t max_temp_file_size = 0) const;

-    TemporaryFileOnDiskHolder createTempFile(size_t max_temp_file_size) const;
+    bool hasTemporaryData() const { return tmp_data && !tmp_data->empty(); }

-    bool hasTemporaryFiles() const { return !temporary_files.empty(); }
-
-    struct TemporaryFiles
-    {
-        std::vector<TemporaryFileOnDiskHolder> files;
-        size_t sum_size_uncompressed = 0;
-        size_t sum_size_compressed = 0;
-        mutable std::mutex mutex;
-
-        bool empty() const
-        {
-            std::lock_guard lock(mutex);
-            return files.empty();
-        }
-    };
-
-    const TemporaryFiles & getTemporaryFiles() const { return temporary_files; }
+    const TemporaryDataOnDisk & getTemporaryData() const { return *tmp_data; }

    /// Get data structure of the result.
    Block getHeader(bool final) const;
@ -1148,7 +1133,7 @@ private:
    Poco::Logger * log = &Poco::Logger::get("Aggregator");

    /// For external aggregation.
-    mutable TemporaryFiles temporary_files;
+    TemporaryDataOnDiskPtr tmp_data;

    size_t min_bytes_for_prefetch = 0;

@ -1251,7 +1236,7 @@ private:
    void writeToTemporaryFileImpl(
        AggregatedDataVariants & data_variants,
        Method & method,
-        NativeWriter & out) const;
+        TemporaryFileStream & out) const;

    /// Merge NULL key data from hash table `src` into `dst`.
    template <typename Method, typename Table>
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@ -30,6 +30,7 @@
 #include <Storages/CompressionCodecSelector.h>
 #include <Storages/StorageS3Settings.h>
 #include <Disks/DiskLocal.h>
+#include <Disks/DiskDecorator.h>
 #include <Disks/ObjectStorages/IObjectStorage.h>
 #include <Disks/IO/ThreadPoolRemoteFSReader.h>
 #include <Disks/IO/ThreadPoolReader.h>
@ -37,6 +38,7 @@
 #include <TableFunctions/TableFunctionFactory.h>
 #include <Interpreters/ActionLocksManager.h>
 #include <Interpreters/ExternalLoaderXMLConfigRepository.h>
+#include <Interpreters/TemporaryDataOnDisk.h>
 #include <Core/Settings.h>
 #include <Core/SettingsQuirks.h>
 #include <Access/AccessControl.h>
@ -188,7 +190,7 @@ struct ContextSharedPart : boost::noncopyable
    ConfigurationPtr config;                                /// Global configuration settings.

    String tmp_path;                                        /// Path to the temporary files that occur when processing the request.
-    mutable VolumePtr tmp_volume;                           /// Volume for the the temporary files that occur when processing the request.
+    TemporaryDataOnDiskScopePtr temp_data_on_disk;          /// Temporary files that occur when processing the request accounted here.

    mutable std::unique_ptr<EmbeddedDictionaries> embedded_dictionaries;    /// Metrica's dictionaries. Have lazy initialization.
    mutable std::unique_ptr<ExternalDictionariesLoader> external_dictionaries_loader;
@ -681,10 +683,27 @@ Strings Context::getWarnings() const
    return common_warnings;
 }

+/// TODO: remove, use `getTempDataOnDisk`
 VolumePtr Context::getTemporaryVolume() const
 {
    auto lock = getLock();
-    return shared->tmp_volume;
+    if (shared->temp_data_on_disk)
+        return shared->temp_data_on_disk->getVolume();
+    return nullptr;
+}
+
+TemporaryDataOnDiskScopePtr Context::getTempDataOnDisk() const
+{
+    auto lock = getLock();
+    if (this->temp_data_on_disk)
+        return this->temp_data_on_disk;
+    return shared->temp_data_on_disk;
+}
+
+void Context::setTempDataOnDisk(TemporaryDataOnDiskScopePtr temp_data_on_disk_)
+{
+    auto lock = getLock();
+    this->temp_data_on_disk = std::move(temp_data_on_disk_);
 }

 void Context::setPath(const String & path)
@ -693,7 +712,7 @@ void Context::setPath(const String & path)

    shared->path = path;

-    if (shared->tmp_path.empty() && !shared->tmp_volume)
+    if (shared->tmp_path.empty() && !shared->temp_data_on_disk)
        shared->tmp_path = shared->path + "tmp/";

    if (shared->flags_path.empty())
@ -712,9 +731,10 @@ void Context::setPath(const String & path)
        shared->user_defined_path = shared->path + "user_defined/";
 }

-VolumePtr Context::setTemporaryStorage(const String & path, const String & policy_name)
+VolumePtr Context::setTemporaryStorage(const String & path, const String & policy_name, size_t max_size)
 {
    std::lock_guard lock(shared->storage_policies_mutex);
+    VolumePtr volume;

    if (policy_name.empty())
    {
@ -723,21 +743,41 @@ VolumePtr Context::setTemporaryStorage(const String & path, const String & polic
            shared->tmp_path += '/';

        auto disk = std::make_shared<DiskLocal>("_tmp_default", shared->tmp_path, 0);
-        shared->tmp_volume = std::make_shared<SingleDiskVolume>("_tmp_default", disk, 0);
+        volume = std::make_shared<SingleDiskVolume>("_tmp_default", disk, 0);
    }
    else
    {
        StoragePolicyPtr tmp_policy = getStoragePolicySelector(lock)->get(policy_name);
        if (tmp_policy->getVolumes().size() != 1)
             throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG,
-                "Policy '{} is used temporary files, such policy should have exactly one volume", policy_name);
-        shared->tmp_volume = tmp_policy->getVolume(0);
+                "Policy '{}' is used temporary files, such policy should have exactly one volume", policy_name);
+        volume = tmp_policy->getVolume(0);
    }

-    if (shared->tmp_volume->getDisks().empty())
+    if (volume->getDisks().empty())
         throw Exception("No disks volume for temporary files", ErrorCodes::NO_ELEMENTS_IN_CONFIG);

-    return shared->tmp_volume;
+    for (const auto & disk : volume->getDisks())
+    {
+        if (!disk)
+            throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Temporary disk is null");
+
+        /// Check that underlying disk is local (can be wrapped in decorator)
+        DiskPtr disk_ptr = disk;
+        if (const auto * disk_decorator = dynamic_cast<const DiskDecorator *>(disk_ptr.get()))
+            disk_ptr = disk_decorator->getNestedDisk();
+
+        if (dynamic_cast<const DiskLocal *>(disk_ptr.get()) == nullptr)
+        {
+            const auto * disk_raw_ptr = disk_ptr.get();
+            throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG,
+                "Disk '{}' ({}) is not local and can't be used for temporary files",
+                disk_ptr->getName(), typeid(*disk_raw_ptr).name());
+        }
+    }
+
+    shared->temp_data_on_disk = std::make_shared<TemporaryDataOnDiskScope>(volume, max_size);
+    return volume;
 }

 void Context::setFlagsPath(const String & path)
@ -2897,14 +2937,13 @@ void Context::shutdown()
        }
    }

-    // Special volumes might also use disks that require shutdown.
-    if (shared->tmp_volume)
+    /// Special volumes might also use disks that require shutdown.
+    auto & tmp_data = shared->temp_data_on_disk;
+    if (tmp_data && tmp_data->getVolume())
    {
-        auto & disks = shared->tmp_volume->getDisks();
+        auto & disks = tmp_data->getVolume()->getDisks();
        for (auto & disk : disks)
-        {
            disk->shutdown();
-        }
    }

    shared->shutdown();
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@ -161,6 +161,8 @@ using ReadTaskCallback = std::function<String()>;

 using MergeTreeReadTaskCallback = std::function<std::optional<PartitionReadResponse>(PartitionReadRequest)>;

+class TemporaryDataOnDiskScope;
+using TemporaryDataOnDiskScopePtr = std::shared_ptr<TemporaryDataOnDiskScope>;

 #if USE_ROCKSDB
 class MergeTreeMetadataCache;
@ -362,6 +364,8 @@ private:
    /// A flag, used to mark if reader needs to apply deleted rows mask.
    bool apply_deleted_mask = true;

+    /// Temporary data for query execution accounting.
+    TemporaryDataOnDiskScopePtr temp_data_on_disk;
 public:
    /// Some counters for current query execution.
    /// Most of them are workarounds and should be removed in the future.
@ -435,7 +439,10 @@ public:
    /// A list of warnings about server configuration to place in `system.warnings` table.
    Strings getWarnings() const;

-    VolumePtr getTemporaryVolume() const;
+    VolumePtr getTemporaryVolume() const; /// TODO: remove, use `getTempDataOnDisk`
+
+    TemporaryDataOnDiskScopePtr getTempDataOnDisk() const;
+    void setTempDataOnDisk(TemporaryDataOnDiskScopePtr temp_data_on_disk_);

    void setPath(const String & path);
    void setFlagsPath(const String & path);
@ -446,7 +453,7 @@ public:

    void addWarningMessage(const String & msg) const;

-    VolumePtr setTemporaryStorage(const String & path, const String & policy_name = "");
+    VolumePtr setTemporaryStorage(const String & path, const String & policy_name, size_t max_size);

    using ConfigurationPtr = Poco::AutoPtr<Poco::Util::AbstractConfiguration>;

--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@ -1453,7 +1453,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
                            settings.max_bytes_before_remerge_sort,
                            settings.remerge_sort_lowered_memory_bytes_ratio,
                            settings.max_bytes_before_external_sort,
-                            this->context->getTemporaryVolume(),
+                            this->context->getTempDataOnDisk(),
                            settings.min_free_disk_space_for_temporary_data,
                            settings.optimize_sorting_by_input_stream_properties);
                        sorting_step->setStepDescription(fmt::format("Sort {} before JOIN", join_pos));
@ -2354,7 +2354,7 @@ static Aggregator::Params getAggregatorParams(
        settings.empty_result_for_aggregation_by_empty_set
            || (settings.empty_result_for_aggregation_by_constant_keys_on_empty_set && keys.empty()
                && query_analyzer.hasConstAggregationKeys()),
-        context.getTemporaryVolume(),
+        context.getTempDataOnDisk(),
        settings.max_threads,
        settings.min_free_disk_space_for_temporary_data,
        settings.compile_aggregate_expressions,
@ -2616,7 +2616,7 @@ void InterpreterSelectQuery::executeWindow(QueryPlan & query_plan)
                settings.max_bytes_before_remerge_sort,
                settings.remerge_sort_lowered_memory_bytes_ratio,
                settings.max_bytes_before_external_sort,
-                context->getTemporaryVolume(),
+                context->getTempDataOnDisk(),
                settings.min_free_disk_space_for_temporary_data,
                settings.optimize_sorting_by_input_stream_properties);
            sorting_step->setStepDescription("Sorting for window '" + window.window_name + "'");
@ -2675,7 +2675,7 @@ void InterpreterSelectQuery::executeOrder(QueryPlan & query_plan, InputOrderInfo
        settings.max_bytes_before_remerge_sort,
        settings.remerge_sort_lowered_memory_bytes_ratio,
        settings.max_bytes_before_external_sort,
-        context->getTemporaryVolume(),
+        context->getTempDataOnDisk(),
        settings.min_free_disk_space_for_temporary_data,
        settings.optimize_sorting_by_input_stream_properties);

--- a/src/Interpreters/MergeJoin.cpp
+++ b/src/Interpreters/MergeJoin.cpp
@ -4,12 +4,13 @@
 #include <Columns/ColumnLowCardinality.h>

 #include <Core/SortCursor.h>
-#include <Formats/TemporaryFileStream.h>
+#include <Formats/TemporaryFileStreamLegacy.h>
 #include <DataTypes/DataTypeNullable.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <Interpreters/MergeJoin.h>
 #include <Interpreters/TableJoin.h>
 #include <Interpreters/JoinUtils.h>
+#include <Interpreters/TemporaryDataOnDisk.h>
 #include <Interpreters/sortBlock.h>
 #include <Processors/Sources/BlocksListSource.h>
 #include <QueryPipeline/QueryPipelineBuilder.h>
@ -1032,7 +1033,7 @@ std::shared_ptr<Block> MergeJoin::loadRightBlock(size_t pos) const
    {
        auto load_func = [&]() -> std::shared_ptr<Block>
        {
-            TemporaryFileStream input(flushed_right_blocks[pos]->path(), materializeBlock(right_sample_block));
+            TemporaryFileStreamLegacy input(flushed_right_blocks[pos]->path(), materializeBlock(right_sample_block));
            return std::make_shared<Block>(input.block_in->read());
        };

--- a/src/Interpreters/ProcessList.cpp
+++ b/src/Interpreters/ProcessList.cpp
@ -69,7 +69,7 @@ static bool isUnlimitedQuery(const IAST * ast)
 }


-ProcessList::EntryPtr ProcessList::insert(const String & query_, const IAST * ast, ContextPtr query_context)
+ProcessList::EntryPtr ProcessList::insert(const String & query_, const IAST * ast, ContextMutablePtr query_context)
 {
    EntryPtr res;

@ -198,7 +198,11 @@ ProcessList::EntryPtr ProcessList::insert(const String & query_, const IAST * as

        auto user_process_list_it = user_to_queries.find(client_info.current_user);
        if (user_process_list_it == user_to_queries.end())
-            user_process_list_it = user_to_queries.emplace(client_info.current_user, this).first;
+        {
+            user_process_list_it = user_to_queries.emplace(std::piecewise_construct,
+                std::forward_as_tuple(client_info.current_user),
+                std::forward_as_tuple(query_context->getGlobalContext(), this)).first;
+        }
        ProcessListForUser & user_process_list = user_process_list_it->second;

        /// Actualize thread group info
@ -208,6 +212,11 @@ ProcessList::EntryPtr ProcessList::insert(const String & query_, const IAST * as
            std::lock_guard lock_thread_group(thread_group->mutex);
            thread_group->performance_counters.setParent(&user_process_list.user_performance_counters);
            thread_group->memory_tracker.setParent(&user_process_list.user_memory_tracker);
+            if (user_process_list.user_temp_data_on_disk)
+            {
+                query_context->setTempDataOnDisk(std::make_shared<TemporaryDataOnDiskScope>(
+                    user_process_list.user_temp_data_on_disk, settings.max_temporary_data_on_disk_size_for_query));
+            }
            thread_group->query = query_;
            thread_group->one_line_query = toOneLineQuery(query_);
            thread_group->normalized_query_hash = normalizedQueryHash<false>(query_);
@ -556,9 +565,19 @@ ProcessList::Info ProcessList::getInfo(bool get_thread_list, bool get_profile_ev


 ProcessListForUser::ProcessListForUser(ProcessList * global_process_list)
+    : ProcessListForUser(nullptr, global_process_list)
+{}
+
+ProcessListForUser::ProcessListForUser(ContextPtr global_context, ProcessList * global_process_list)
    : user_overcommit_tracker(global_process_list, this)
 {
    user_memory_tracker.setOvercommitTracker(&user_overcommit_tracker);
+
+    if (global_context)
+    {
+        size_t size_limit = global_context->getSettingsRef().max_temporary_data_on_disk_size_for_user;
+        user_temp_data_on_disk = std::make_shared<TemporaryDataOnDiskScope>(global_context->getTempDataOnDisk(), size_limit);
+    }
 }


--- a/src/Interpreters/ProcessList.h
+++ b/src/Interpreters/ProcessList.h
@ -5,6 +5,8 @@
 #include <Interpreters/CancellationCode.h>
 #include <Interpreters/ClientInfo.h>
 #include <Interpreters/QueryPriorities.h>
+#include <Interpreters/TemporaryDataOnDisk.h>
+#include <Interpreters/Context.h>
 #include <QueryPipeline/BlockIO.h>
 #include <QueryPipeline/ExecutionSpeedLimits.h>
 #include <Storages/IStorage_fwd.h>
@ -236,6 +238,8 @@ struct ProcessListForUser
 {
    explicit ProcessListForUser(ProcessList * global_process_list);

+    ProcessListForUser(ContextPtr global_context, ProcessList * global_process_list);
+
    /// query_id -> ProcessListElement(s). There can be multiple queries with the same query_id as long as all queries except one are cancelled.
    using QueryToElement = std::unordered_map<String, QueryStatus *>;
    QueryToElement queries;
@ -244,6 +248,8 @@ struct ProcessListForUser
    /// Limit and counter for memory of all simultaneously running queries of single user.
    MemoryTracker user_memory_tracker{VariableContext::User};

+    TemporaryDataOnDiskScopePtr user_temp_data_on_disk;
+
    UserOvercommitTracker user_overcommit_tracker;

    /// Count network usage for all simultaneously running queries of single user.
@ -257,6 +263,7 @@ struct ProcessListForUser
    /// Clears network bandwidth Throttler, so it will not count periods of inactivity.
    void resetTrackers()
    {
+        /// TODO: should we drop user_temp_data_on_disk here?
        user_memory_tracker.reset();
        if (user_throttler)
            user_throttler.reset();
@ -374,7 +381,7 @@ public:
      * If timeout is passed - throw an exception.
      * Don't count KILL QUERY queries.
      */
-    EntryPtr insert(const String & query_, const IAST * ast, ContextPtr query_context);
+    EntryPtr insert(const String & query_, const IAST * ast, ContextMutablePtr query_context);

    /// Number of currently executing queries.
    size_t size() const { return processes.size(); }
--- a/src/Interpreters/SortedBlocksWriter.cpp
+++ b/src/Interpreters/SortedBlocksWriter.cpp
@ -5,7 +5,7 @@
 #include <Processors/Sources/SourceFromSingleChunk.h>
 #include <Processors/Merges/MergingSortedTransform.h>
 #include <Processors/Sources/TemporaryFileLazySource.h>
-#include <Formats/TemporaryFileStream.h>
+#include <Formats/TemporaryFileStreamLegacy.h>
 #include <Disks/IVolume.h>
 #include <Disks/TemporaryFileOnDisk.h>

@ -39,7 +39,7 @@ namespace
 TemporaryFileOnDiskHolder flushToFile(const DiskPtr & disk, const Block & header, QueryPipelineBuilder pipeline, const String & codec)
 {
    auto tmp_file = std::make_unique<TemporaryFileOnDisk>(disk, CurrentMetrics::TemporaryFilesForJoin);
-    auto write_stat = TemporaryFileStream::write(tmp_file->getPath(), header, std::move(pipeline), codec);
+    auto write_stat = TemporaryFileStreamLegacy::write(tmp_file->getPath(), header, std::move(pipeline), codec);

    ProfileEvents::increment(ProfileEvents::ExternalProcessingCompressedBytesTotal, write_stat.compressed_bytes);
    ProfileEvents::increment(ProfileEvents::ExternalProcessingUncompressedBytesTotal, write_stat.uncompressed_bytes);
--- a/src/Interpreters/TemporaryDataOnDisk.cpp
+++ b/src/Interpreters/TemporaryDataOnDisk.cpp
@ -0,0 +1,270 @@
+#include <Interpreters/TemporaryDataOnDisk.h>
+
+#include <IO/WriteBufferFromFile.h>
+#include <IO/ReadBufferFromFile.h>
+#include <Compression/CompressedWriteBuffer.h>
+#include <Compression/CompressedReadBuffer.h>
+#include <Formats/NativeWriter.h>
+#include <Formats/NativeReader.h>
+#include <Core/ProtocolDefines.h>
+
+#include <Common/logger_useful.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int TOO_MANY_ROWS_OR_BYTES;
+    extern const int LOGICAL_ERROR;
+    extern const int NOT_ENOUGH_SPACE;
+}
+
+void TemporaryDataOnDiskScope::deltaAllocAndCheck(int compressed_delta, int uncompressed_delta)
+{
+    if (parent)
+        parent->deltaAllocAndCheck(compressed_delta, uncompressed_delta);
+
+
+    /// check that we don't go negative
+    if ((compressed_delta < 0 && stat.compressed_size < static_cast<size_t>(-compressed_delta)) ||
+        (uncompressed_delta < 0 && stat.uncompressed_size < static_cast<size_t>(-uncompressed_delta)))
+    {
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Negative temporary data size");
+    }
+
+    size_t new_consumprion = stat.compressed_size + compressed_delta;
+    if (compressed_delta > 0 && limit && new_consumprion > limit)
+        throw Exception(ErrorCodes::TOO_MANY_ROWS_OR_BYTES, "Limit for temporary files size exceeded");
+
+    stat.compressed_size += compressed_delta;
+    stat.uncompressed_size += uncompressed_delta;
+}
+
+TemporaryFileStream & TemporaryDataOnDisk::createStream(const Block & header, CurrentMetrics::Value metric_scope, size_t max_file_size)
+{
+    DiskPtr disk;
+    if (max_file_size > 0)
+    {
+        auto reservation = volume->reserve(max_file_size);
+        if (!reservation)
+            throw Exception("Not enough space on temporary disk", ErrorCodes::NOT_ENOUGH_SPACE);
+        disk = reservation->getDisk();
+    }
+    else
+    {
+        disk = volume->getDisk();
+    }
+
+    auto tmp_file = std::make_unique<TemporaryFileOnDisk>(disk, metric_scope);
+
+    std::lock_guard lock(mutex);
+    TemporaryFileStreamPtr & tmp_stream = streams.emplace_back(std::make_unique<TemporaryFileStream>(std::move(tmp_file), header, this));
+    return *tmp_stream;
+}
+
+
+std::vector<TemporaryFileStream *> TemporaryDataOnDisk::getStreams() const
+{
+    std::vector<TemporaryFileStream *> res;
+    std::lock_guard lock(mutex);
+    res.reserve(streams.size());
+    for (const auto & stream : streams)
+        res.push_back(stream.get());
+    return res;
+}
+
+bool TemporaryDataOnDisk::empty() const
+{
+    std::lock_guard lock(mutex);
+    return streams.empty();
+}
+
+struct TemporaryFileStream::OutputWriter
+{
+    OutputWriter(const String & path, const Block & header_)
+        : out_file_buf(path)
+        , out_compressed_buf(out_file_buf)
+        , out_writer(out_compressed_buf, DBMS_TCP_PROTOCOL_VERSION, header_)
+    {
+    }
+
+    void write(const Block & block)
+    {
+        if (finalized)
+            throw Exception("Cannot write to finalized stream", ErrorCodes::LOGICAL_ERROR);
+        out_writer.write(block);
+    }
+
+
+    void finalize()
+    {
+        if (finalized)
+            return;
+
+        /// if we called finalize() explicitly, and got an exception,
+        /// we don't want to get it again in the destructor, so set finalized flag first
+        finalized = true;
+
+        out_writer.flush();
+        out_compressed_buf.finalize();
+        out_file_buf.finalize();
+    }
+
+    ~OutputWriter()
+    {
+        try
+        {
+            finalize();
+        }
+        catch (...)
+        {
+            tryLogCurrentException(__PRETTY_FUNCTION__);
+        }
+    }
+
+    WriteBufferFromFile out_file_buf;
+    CompressedWriteBuffer out_compressed_buf;
+    NativeWriter out_writer;
+
+    bool finalized = false;
+};
+
+struct TemporaryFileStream::InputReader
+{
+    InputReader(const String & path, const Block & header_)
+        : in_file_buf(path)
+        , in_compressed_buf(in_file_buf)
+        , in_reader(in_compressed_buf, header_, DBMS_TCP_PROTOCOL_VERSION)
+    {
+    }
+
+    explicit InputReader(const String & path)
+        : in_file_buf(path)
+        , in_compressed_buf(in_file_buf)
+        , in_reader(in_compressed_buf, DBMS_TCP_PROTOCOL_VERSION)
+    {
+    }
+
+    Block read() { return in_reader.read(); }
+
+    ReadBufferFromFile in_file_buf;
+    CompressedReadBuffer in_compressed_buf;
+    NativeReader in_reader;
+};
+
+TemporaryFileStream::TemporaryFileStream(TemporaryFileOnDiskHolder file_, const Block & header_, TemporaryDataOnDisk * parent_)
+    : parent(parent_)
+    , header(header_)
+    , file(std::move(file_))
+    , out_writer(std::make_unique<OutputWriter>(file->path(), header))
+{
+}
+
+void TemporaryFileStream::write(const Block & block)
+{
+    if (!out_writer)
+        throw Exception("Writing has been finished", ErrorCodes::LOGICAL_ERROR);
+
+    updateAllocAndCheck();
+    out_writer->write(block);
+}
+
+TemporaryFileStream::Stat TemporaryFileStream::finishWriting()
+{
+    if (out_writer)
+    {
+        out_writer->finalize();
+        /// The amount of written data can be changed after finalization, some buffers can be flushed
+        /// Need to update the stat
+        updateAllocAndCheck();
+        out_writer.reset();
+
+        /// reader will be created at the first read call, not to consume memory before it is needed
+    }
+    return stat;
+}
+
+bool TemporaryFileStream::isWriteFinished() const
+{
+    assert(in_reader == nullptr || out_writer == nullptr);
+    return out_writer == nullptr;
+}
+
+Block TemporaryFileStream::read()
+{
+    if (!isWriteFinished())
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Writing has been not finished");
+
+    if (isFinalized())
+        return {};
+
+    if (!in_reader)
+    {
+        in_reader = std::make_unique<InputReader>(file->path(), header);
+    }
+
+    Block block = in_reader->read();
+    if (!block)
+    {
+        /// finalize earlier to release resources, do not wait for the destructor
+        this->finalize();
+    }
+    return block;
+}
+
+void TemporaryFileStream::updateAllocAndCheck()
+{
+    assert(out_writer);
+    size_t new_compressed_size = out_writer->out_compressed_buf.getCompressedBytes();
+    size_t new_uncompressed_size = out_writer->out_compressed_buf.getUncompressedBytes();
+
+    if (unlikely(new_compressed_size < stat.compressed_size || new_uncompressed_size < stat.uncompressed_size))
+    {
+        throw Exception(ErrorCodes::LOGICAL_ERROR,
+            "Temporary file {} size decreased after write: compressed: {} -> {}, uncompressed: {} -> {}",
+            file->path(), new_compressed_size, stat.compressed_size, new_uncompressed_size, stat.uncompressed_size);
+    }
+
+    parent->deltaAllocAndCheck(new_compressed_size - stat.compressed_size, new_uncompressed_size - stat.uncompressed_size);
+    stat.compressed_size = new_compressed_size;
+    stat.uncompressed_size = new_uncompressed_size;
+}
+
+bool TemporaryFileStream::isFinalized() const
+{
+    return file == nullptr;
+}
+
+void TemporaryFileStream::finalize()
+{
+    if (file)
+    {
+        file.reset();
+        parent->deltaAllocAndCheck(-stat.compressed_size, -stat.uncompressed_size);
+    }
+
+    if (in_reader)
+        in_reader.reset();
+
+    if (out_writer)
+    {
+        out_writer->finalize();
+        out_writer.reset();
+    }
+}
+
+TemporaryFileStream::~TemporaryFileStream()
+{
+    try
+    {
+        finalize();
+    }
+    catch (...)
+    {
+        tryLogCurrentException(__PRETTY_FUNCTION__);
+        assert(false); /// deltaAllocAndCheck with negative can't throw exception
+    }
+}
+
+}
--- a/src/Interpreters/TemporaryDataOnDisk.h
+++ b/src/Interpreters/TemporaryDataOnDisk.h
@ -0,0 +1,139 @@
+#pragma once
+
+#include <boost/noncopyable.hpp>
+
+#include <Interpreters/Context.h>
+#include <Disks/TemporaryFileOnDisk.h>
+#include <Disks/IVolume.h>
+
+namespace DB
+{
+
+class TemporaryDataOnDiskScope;
+using TemporaryDataOnDiskScopePtr = std::shared_ptr<TemporaryDataOnDiskScope>;
+
+class TemporaryDataOnDisk;
+using TemporaryDataOnDiskPtr = std::unique_ptr<TemporaryDataOnDisk>;
+
+class TemporaryFileStream;
+using TemporaryFileStreamPtr = std::unique_ptr<TemporaryFileStream>;
+
+
+/*
+ * Used to account amount of temporary data written to disk.
+ * If limit is set, throws exception if limit is exceeded.
+ * Data can be nested, so parent scope accounts all data written by children.
+ * Scopes are: global -> per-user -> per-query -> per-purpose (sorting, aggregation, etc).
+ */
+class TemporaryDataOnDiskScope : boost::noncopyable
+{
+public:
+    struct StatAtomic
+    {
+        std::atomic<size_t> compressed_size;
+        std::atomic<size_t> uncompressed_size;
+    };
+
+    explicit TemporaryDataOnDiskScope(VolumePtr volume_, size_t limit_)
+        : volume(std::move(volume_)), limit(limit_)
+    {}
+
+    explicit TemporaryDataOnDiskScope(TemporaryDataOnDiskScopePtr parent_, size_t limit_)
+        : parent(std::move(parent_)), volume(parent->volume), limit(limit_)
+    {}
+
+    /// TODO: remove
+    /// Refactor all code that uses volume directly to use TemporaryDataOnDisk.
+    VolumePtr getVolume() const { return volume; }
+
+protected:
+    void deltaAllocAndCheck(int compressed_delta, int uncompressed_delta);
+
+    TemporaryDataOnDiskScopePtr parent = nullptr;
+    VolumePtr volume;
+
+    StatAtomic stat;
+    size_t limit = 0;
+};
+
+/*
+ * Holds the set of temporary files.
+ * New file stream is created with `createStream`.
+ * Streams are owned by this object and will be deleted when it is deleted.
+ * It's a leaf node in temorarty data scope tree.
+ */
+class TemporaryDataOnDisk : private TemporaryDataOnDiskScope
+{
+    friend class TemporaryFileStream; /// to allow it to call `deltaAllocAndCheck` to account data
+public:
+    using TemporaryDataOnDiskScope::StatAtomic;
+
+    explicit TemporaryDataOnDisk(TemporaryDataOnDiskScopePtr parent_)
+        : TemporaryDataOnDiskScope(std::move(parent_), 0)
+    {}
+
+    /// If max_file_size > 0, then check that there's enough space on the disk and throw an exception in case of lack of free space
+    TemporaryFileStream & createStream(const Block & header, CurrentMetrics::Value metric_scope, size_t max_file_size = 0);
+
+    std::vector<TemporaryFileStream *> getStreams() const;
+    bool empty() const;
+
+    const StatAtomic & getStat() const { return stat; }
+
+private:
+    mutable std::mutex mutex;
+    std::vector<TemporaryFileStreamPtr> streams TSA_GUARDED_BY(mutex);
+};
+
+/*
+ * Data can be written into this stream and then read.
+ * After finish writing, call `finishWriting` and then `read` to read the data.
+ * Account amount of data written to disk in parent scope.
+ */
+class TemporaryFileStream : boost::noncopyable
+{
+public:
+    struct Stat
+    {
+        /// Statistics for file
+        /// Non-atomic because we don't allow to `read` or `write` into single file from multiple threads
+        size_t compressed_size = 0;
+        size_t uncompressed_size = 0;
+    };
+
+    TemporaryFileStream(TemporaryFileOnDiskHolder file_, const Block & header_, TemporaryDataOnDisk * parent_);
+
+    void write(const Block & block);
+    Stat finishWriting();
+    bool isWriteFinished() const;
+
+    Block read();
+
+    const String & path() const { return file->getPath(); }
+    Block getHeader() const { return header; }
+
+    ~TemporaryFileStream();
+
+private:
+    void updateAllocAndCheck();
+
+    /// Finalize everything, close reader and writer, delete file
+    void finalize();
+    bool isFinalized() const;
+
+    TemporaryDataOnDisk * parent;
+
+    Block header;
+
+    TemporaryFileOnDiskHolder file;
+
+    Stat stat;
+
+    struct OutputWriter;
+    std::unique_ptr<OutputWriter> out_writer;
+
+    struct InputReader;
+    std::unique_ptr<InputReader> in_reader;
+};
+
+}
--- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp
+++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp
@ -149,7 +149,7 @@ void TranslateQualifiedNamesMatcher::visit(ASTFunction & node, const ASTPtr &, D
    if (!func_arguments) return;

    String func_name_lowercase = Poco::toLower(node.name);
-    if (func_name_lowercase == "count" &&
+    if ((func_name_lowercase == "count" || func_name_lowercase == "countstate") &&
        func_arguments->children.size() == 1 &&
        func_arguments->children[0]->as<ASTAsterisk>())
        func_arguments->children.clear();
--- a/src/Parsers/ASTSelectWithUnionQuery.cpp
+++ b/src/Parsers/ASTSelectWithUnionQuery.cpp
@ -33,14 +33,20 @@ void ASTSelectWithUnionQuery::formatQueryImpl(const FormatSettings & settings, F

    auto mode_to_str = [&](auto mode)
    {
-        if (mode == SelectUnionMode::UNION_ALL)
+        if (mode == SelectUnionMode::UNION_DEFAULT)
+            return "UNION";
+        else if (mode == SelectUnionMode::UNION_ALL)
            return "UNION ALL";
        else if (mode == SelectUnionMode::UNION_DISTINCT)
            return "UNION DISTINCT";
+        else if (mode == SelectUnionMode::EXCEPT_DEFAULT)
+            return "EXCEPT";
        else if (mode == SelectUnionMode::EXCEPT_ALL)
            return "EXCEPT ALL";
        else if (mode == SelectUnionMode::EXCEPT_DISTINCT)
            return "EXCEPT DISTINCT";
+        else if (mode == SelectUnionMode::INTERSECT_DEFAULT)
+            return "INTERSECT";
        else if (mode == SelectUnionMode::INTERSECT_ALL)
            return "INTERSECT ALL";
        else if (mode == SelectUnionMode::INTERSECT_DISTINCT)
--- a/src/Parsers/ASTSetQuery.cpp
+++ b/src/Parsers/ASTSetQuery.cpp
@ -24,13 +24,28 @@ void ASTSetQuery::formatImpl(const FormatSettings & format, FormatState &, Forma
    if (is_standalone)
        format.ostr << (format.hilite ? hilite_keyword : "") << "SET " << (format.hilite ? hilite_none : "");

-    for (auto it = changes.begin(); it != changes.end(); ++it)
-    {
-        if (it != changes.begin())
-            format.ostr << ", ";
+    bool first = true;

-        formatSettingName(it->name, format.ostr);
-        format.ostr << " = " << applyVisitor(FieldVisitorToString(), it->value);
+    for (const auto & change : changes)
+    {
+        if (!first)
+            format.ostr << ", ";
+        else
+            first = false;
+
+        formatSettingName(change.name, format.ostr);
+        format.ostr << " = " << applyVisitor(FieldVisitorToString(), change.value);
+    }
+
+    for (const auto & [name, value] : query_parameters)
+    {
+        if (!first)
+            format.ostr << ", ";
+        else
+            first = false;
+
+        formatSettingName(QUERY_PARAMETER_NAME_PREFIX + name, format.ostr);
+        format.ostr << " = " << value;
    }
 }

--- a/src/Parsers/ASTSetQuery.h
+++ b/src/Parsers/ASTSetQuery.h
@ -7,6 +7,8 @@
 namespace DB
 {

+constexpr char QUERY_PARAMETER_NAME_PREFIX[] = "param_";
+
 /** SET query
  */
 class ASTSetQuery : public IAST
--- a/src/Parsers/ParserSetQuery.h
+++ b/src/Parsers/ParserSetQuery.h
@ -9,8 +9,6 @@ namespace DB

 struct SettingChange;

-constexpr char QUERY_PARAMETER_NAME_PREFIX[] = "param_";
-
 /** Query like this:
  * SET name1 = value1, name2 = value2, ...
  */
--- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp
@ -314,13 +314,16 @@ void registerInputFormatCSV(FormatFactory & factory)
    registerWithNamesAndTypes("CSV", register_func);
 }

-std::pair<bool, size_t> fileSegmentationEngineCSVImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size, size_t min_rows)
+std::pair<bool, size_t> fileSegmentationEngineCSVImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t min_rows, size_t max_rows)
 {
    char * pos = in.position();
    bool quotes = false;
    bool need_more_data = true;
    size_t number_of_rows = 0;

+    if (max_rows && (max_rows < min_rows))
+        max_rows = min_rows;
+
    while (loadAtPosition(in, memory, pos) && need_more_data)
    {
        if (quotes)
@ -346,30 +349,30 @@ std::pair<bool, size_t> fileSegmentationEngineCSVImpl(ReadBuffer & in, DB::Memor
                throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR);
            else if (pos == in.buffer().end())
                continue;
-            else if (*pos == '"')
+
+            if (*pos == '"')
            {
                quotes = true;
                ++pos;
+                continue;
            }
-            else if (*pos == '\n')
+
+            ++number_of_rows;
+            if ((number_of_rows >= min_rows)
+                && ((memory.size() + static_cast<size_t>(pos - in.position()) >= min_bytes) || (number_of_rows == max_rows)))
+                need_more_data = false;
+
+            if (*pos == '\n')
            {
-                ++number_of_rows;
-                if (memory.size() + static_cast<size_t>(pos - in.position()) >= min_chunk_size && number_of_rows >= min_rows)
-                    need_more_data = false;
                ++pos;
                if (loadAtPosition(in, memory, pos) && *pos == '\r')
                    ++pos;
            }
            else if (*pos == '\r')
            {
-                if (memory.size() + static_cast<size_t>(pos - in.position()) >= min_chunk_size && number_of_rows >= min_rows)
-                    need_more_data = false;
                ++pos;
                if (loadAtPosition(in, memory, pos) && *pos == '\n')
-                {
                    ++pos;
-                    ++number_of_rows;
-                }
            }
        }
    }
@ -383,9 +386,9 @@ void registerFileSegmentationEngineCSV(FormatFactory & factory)
    auto register_func = [&](const String & format_name, bool with_names, bool with_types)
    {
        size_t min_rows = 1 + int(with_names) + int(with_types);
-        factory.registerFileSegmentationEngine(format_name, [min_rows](ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size)
+        factory.registerFileSegmentationEngine(format_name, [min_rows](ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t max_rows)
        {
-            return fileSegmentationEngineCSVImpl(in, memory, min_chunk_size, min_rows);
+            return fileSegmentationEngineCSVImpl(in, memory, min_bytes, min_rows, max_rows);
        });
    };

--- a/src/Processors/Formats/Impl/CSVRowInputFormat.h
+++ b/src/Processors/Formats/Impl/CSVRowInputFormat.h
@ -83,6 +83,6 @@ private:
    CSVFormatReader reader;
 };

-std::pair<bool, size_t> fileSegmentationEngineCSVImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size, size_t min_rows);
+std::pair<bool, size_t> fileSegmentationEngineCSVImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t min_rows, size_t max_rows);

 }
--- a/src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp
@ -66,8 +66,8 @@ void registerFileSegmentationEngineHiveText(FormatFactory & factory)
 {
    factory.registerFileSegmentationEngine(
        "HiveText",
-        [](ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size) -> std::pair<bool, size_t> {
-            return fileSegmentationEngineCSVImpl(in, memory, min_chunk_size, 0);
+        [](ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t max_rows) -> std::pair<bool, size_t> {
+            return fileSegmentationEngineCSVImpl(in, memory, min_bytes, 0, max_rows);
        });
 }

--- a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp
@ -265,9 +265,9 @@ void registerFileSegmentationEngineJSONCompactEachRow(FormatFactory & factory)
        /// the minimum of rows for segmentation engine according to
        /// parameters with_names and with_types.
        size_t min_rows = 1 + int(with_names) + int(with_types);
-        factory.registerFileSegmentationEngine(format_name, [min_rows](ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size)
+        factory.registerFileSegmentationEngine(format_name, [min_rows](ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t max_rows)
        {
-            return JSONUtils::fileSegmentationEngineJSONCompactEachRow(in, memory, min_chunk_size, min_rows);
+            return JSONUtils::fileSegmentationEngineJSONCompactEachRow(in, memory, min_bytes, min_rows, max_rows);
        });
    };

--- a/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp
@ -33,7 +33,7 @@ void ParallelParsingInputFormat::segmentatorThreadFunction(ThreadGroupStatusPtr
            // Segmentating the original input.
            unit.segment.resize(0);

-            auto [have_more_data, currently_read_rows] = file_segmentation_engine(*in, unit.segment, min_chunk_bytes);
+            auto [have_more_data, currently_read_rows] = file_segmentation_engine(*in, unit.segment, min_chunk_bytes, max_block_size);

            unit.offset = successfully_read_rows_count;
            successfully_read_rows_count += currently_read_rows;
--- a/src/Processors/Formats/Impl/ParallelParsingInputFormat.h
+++ b/src/Processors/Formats/Impl/ParallelParsingInputFormat.h
@ -82,6 +82,7 @@ public:
        String format_name;
        size_t max_threads;
        size_t min_chunk_bytes;
+        size_t max_block_size;
        bool is_server;
    };

@ -91,6 +92,7 @@ public:
        , file_segmentation_engine(params.file_segmentation_engine)
        , format_name(params.format_name)
        , min_chunk_bytes(params.min_chunk_bytes)
+        , max_block_size(params.max_block_size)
        , is_server(params.is_server)
        , pool(params.max_threads)
    {
@ -188,6 +190,7 @@ private:
    FormatFactory::FileSegmentationEngine file_segmentation_engine;
    const String format_name;
    const size_t min_chunk_bytes;
+    const size_t max_block_size;

    BlockMissingValues last_block_missing_values;

--- a/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp
@ -174,7 +174,7 @@ void registerInputFormatRegexp(FormatFactory & factory)
    });
 }

-static std::pair<bool, size_t> fileSegmentationEngineRegexpImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size)
+static std::pair<bool, size_t> fileSegmentationEngineRegexpImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t max_rows)
 {
    char * pos = in.position();
    bool need_more_data = true;
@ -182,17 +182,28 @@ static std::pair<bool, size_t> fileSegmentationEngineRegexpImpl(ReadBuffer & in,

    while (loadAtPosition(in, memory, pos) && need_more_data)
    {
-        pos = find_first_symbols<'\n'>(pos, in.buffer().end());
+        pos = find_first_symbols<'\r', '\n'>(pos, in.buffer().end());
        if (pos > in.buffer().end())
            throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR);
        else if (pos == in.buffer().end())
            continue;

-        if (memory.size() + static_cast<size_t>(pos - in.position()) >= min_chunk_size)
+        ++number_of_rows;
+        if ((memory.size() + static_cast<size_t>(pos - in.position()) >= min_bytes) || (number_of_rows == max_rows))
            need_more_data = false;

-        ++pos;
-        ++number_of_rows;
+        if (*pos == '\n')
+        {
+            ++pos;
+            if (loadAtPosition(in, memory, pos) && *pos == '\r')
+                ++pos;
+        }
+        else if (*pos == '\r')
+        {
+            ++pos;
+            if (loadAtPosition(in, memory, pos) && *pos == '\n')
+                ++pos;
+        }
    }

    saveUpToPosition(in, memory, pos);
--- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
@ -320,12 +320,15 @@ void registerTSVSchemaReader(FormatFactory & factory)
    }
 }

-static std::pair<bool, size_t> fileSegmentationEngineTabSeparatedImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size, bool is_raw, size_t min_rows)
+static std::pair<bool, size_t> fileSegmentationEngineTabSeparatedImpl(ReadBuffer & in, DB::Memory<> & memory, bool is_raw, size_t min_bytes, size_t min_rows, size_t max_rows)
 {
    bool need_more_data = true;
    char * pos = in.position();
    size_t number_of_rows = 0;

+    if (max_rows && (max_rows < min_rows))
+        max_rows = min_rows;
+
    while (loadAtPosition(in, memory, pos) && need_more_data)
    {
        if (is_raw)
@ -335,8 +338,7 @@ static std::pair<bool, size_t> fileSegmentationEngineTabSeparatedImpl(ReadBuffer

        if (pos > in.buffer().end())
            throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR);
-
-        if (pos == in.buffer().end())
+        else if (pos == in.buffer().end())
            continue;

        if (!is_raw && *pos == '\\')
@ -344,15 +346,25 @@ static std::pair<bool, size_t> fileSegmentationEngineTabSeparatedImpl(ReadBuffer
            ++pos;
            if (loadAtPosition(in, memory, pos))
                ++pos;
+            continue;
        }
-        else if (*pos == '\n' || *pos == '\r')
-        {
-            if (*pos == '\n')
-                ++number_of_rows;

-            if ((memory.size() + static_cast<size_t>(pos - in.position()) >= min_chunk_size) && number_of_rows >= min_rows)
-                need_more_data = false;
+        ++number_of_rows;
+        if ((number_of_rows >= min_rows)
+            && ((memory.size() + static_cast<size_t>(pos - in.position()) >= min_bytes) || (number_of_rows == max_rows)))
+            need_more_data = false;
+
+        if (*pos == '\n')
+        {
            ++pos;
+            if (loadAtPosition(in, memory, pos) && *pos == '\r')
+                ++pos;
+        }
+        else if (*pos == '\r')
+        {
+            ++pos;
+            if (loadAtPosition(in, memory, pos) && *pos == '\n')
+                ++pos;
        }
    }

@ -368,9 +380,9 @@ void registerFileSegmentationEngineTabSeparated(FormatFactory & factory)
        auto register_func = [&](const String & format_name, bool with_names, bool with_types)
        {
            size_t min_rows = 1 + static_cast<int>(with_names) + static_cast<int>(with_types);
-            factory.registerFileSegmentationEngine(format_name, [is_raw, min_rows](ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size)
+            factory.registerFileSegmentationEngine(format_name, [is_raw, min_rows](ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t max_rows)
            {
-                return fileSegmentationEngineTabSeparatedImpl(in, memory, min_chunk_size, is_raw, min_rows);
+                return fileSegmentationEngineTabSeparatedImpl(in, memory, is_raw, min_bytes, min_rows, max_rows);
            });
        };

@ -381,12 +393,9 @@ void registerFileSegmentationEngineTabSeparated(FormatFactory & factory)
    }

    // We can use the same segmentation engine for TSKV.
-    factory.registerFileSegmentationEngine("TSKV", [](
-        ReadBuffer & in,
-        DB::Memory<> & memory,
-        size_t min_chunk_size)
+    factory.registerFileSegmentationEngine("TSKV", [](ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t max_rows)
    {
-        return fileSegmentationEngineTabSeparatedImpl(in, memory, min_chunk_size, false, 1);
+        return fileSegmentationEngineTabSeparatedImpl(in, memory, false, min_bytes, 1, max_rows);
    });
 }

--- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp
+++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp
@ -5,6 +5,7 @@
 #include <IO/ReadHelpers.h>
 #include <IO/Operators.h>

+
 namespace DB
 {

--- a/src/Processors/QueryPlan/AggregatingStep.cpp
+++ b/src/Processors/QueryPlan/AggregatingStep.cpp
@ -177,7 +177,7 @@ void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const B
                    transform_params->params.group_by_two_level_threshold_bytes,
                    transform_params->params.max_bytes_before_external_group_by,
                    transform_params->params.empty_result_for_aggregation_by_empty_set,
-                    transform_params->params.tmp_volume,
+                    transform_params->params.tmp_data_scope,
                    transform_params->params.max_threads,
                    transform_params->params.min_free_disk_space,
                    transform_params->params.compile_aggregate_expressions,
--- a/src/Processors/QueryPlan/SortingStep.cpp
+++ b/src/Processors/QueryPlan/SortingStep.cpp
@ -12,6 +12,11 @@
 namespace DB
 {

+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+
 static ITransformingStep::Traits getTraits(size_t limit)
 {
    return ITransformingStep::Traits
@ -37,7 +42,7 @@ SortingStep::SortingStep(
    size_t max_bytes_before_remerge_,
    double remerge_lowered_memory_bytes_ratio_,
    size_t max_bytes_before_external_sort_,
-    VolumePtr tmp_volume_,
+    TemporaryDataOnDiskScopePtr tmp_data_,
    size_t min_free_disk_space_,
    bool optimize_sorting_by_input_stream_properties_)
    : ITransformingStep(input_stream, input_stream.header, getTraits(limit_))
@ -49,10 +54,13 @@ SortingStep::SortingStep(
    , max_bytes_before_remerge(max_bytes_before_remerge_)
    , remerge_lowered_memory_bytes_ratio(remerge_lowered_memory_bytes_ratio_)
    , max_bytes_before_external_sort(max_bytes_before_external_sort_)
-    , tmp_volume(tmp_volume_)
+    , tmp_data(tmp_data_)
    , min_free_disk_space(min_free_disk_space_)
    , optimize_sorting_by_input_stream_properties(optimize_sorting_by_input_stream_properties_)
 {
+    if (max_bytes_before_external_sort && tmp_data == nullptr)
+        throw Exception("Temporary data storage for external sorting is not provided", ErrorCodes::LOGICAL_ERROR);
+
    /// TODO: check input_stream is partially sorted by the same description.
    output_stream->sort_description = result_description;
    output_stream->sort_scope = DataStream::SortScope::Global;
@ -189,7 +197,7 @@ void SortingStep::mergeSorting(QueryPipelineBuilder & pipeline, const SortDescri
                max_bytes_before_remerge / pipeline.getNumStreams(),
                remerge_lowered_memory_bytes_ratio,
                max_bytes_before_external_sort,
-                tmp_volume,
+                std::make_unique<TemporaryDataOnDisk>(tmp_data),
                min_free_disk_space);
        });
 }
--- a/src/Processors/QueryPlan/SortingStep.h
+++ b/src/Processors/QueryPlan/SortingStep.h
@ -2,7 +2,7 @@
 #include <Processors/QueryPlan/ITransformingStep.h>
 #include <Core/SortDescription.h>
 #include <QueryPipeline/SizeLimits.h>
-#include <Disks/IVolume.h>
+#include <Interpreters/TemporaryDataOnDisk.h>

 namespace DB
 {
@ -21,7 +21,7 @@ public:
        size_t max_bytes_before_remerge_,
        double remerge_lowered_memory_bytes_ratio_,
        size_t max_bytes_before_external_sort_,
-        VolumePtr tmp_volume_,
+        TemporaryDataOnDiskScopePtr tmp_data_,
        size_t min_free_disk_space_,
        bool optimize_sorting_by_input_stream_properties_);

@ -85,7 +85,8 @@ private:
    size_t max_bytes_before_remerge = 0;
    double remerge_lowered_memory_bytes_ratio = 0;
    size_t max_bytes_before_external_sort = 0;
-    VolumePtr tmp_volume;
+    TemporaryDataOnDiskScopePtr tmp_data = nullptr;
+
    size_t min_free_disk_space = 0;
    const bool optimize_sorting_by_input_stream_properties = false;
 };
--- a/src/Processors/Sources/TemporaryFileLazySource.cpp
+++ b/src/Processors/Sources/TemporaryFileLazySource.cpp
@ -1,5 +1,5 @@
 #include <Processors/Sources/TemporaryFileLazySource.h>
-#include <Formats/TemporaryFileStream.h>
+#include <Formats/TemporaryFileStreamLegacy.h>

 namespace DB
 {
@ -18,7 +18,7 @@ Chunk TemporaryFileLazySource::generate()
        return {};

    if (!stream)
-        stream = std::make_unique<TemporaryFileStream>(path, header);
+        stream = std::make_unique<TemporaryFileStreamLegacy>(path, header);

    auto block = stream->block_in->read();
    if (!block)
--- a/src/Processors/Sources/TemporaryFileLazySource.h
+++ b/src/Processors/Sources/TemporaryFileLazySource.h
@ -5,7 +5,7 @@
 namespace DB
 {

-struct TemporaryFileStream;
+struct TemporaryFileStreamLegacy;

 class TemporaryFileLazySource : public ISource
 {
@ -22,7 +22,7 @@ private:
    Block header;
    bool done;

-    std::unique_ptr<TemporaryFileStream> stream;
+    std::unique_ptr<TemporaryFileStreamLegacy> stream;
 };

 }
--- a/src/Processors/TTL/TTLAggregationAlgorithm.cpp
+++ b/src/Processors/TTL/TTLAggregationAlgorithm.cpp
@ -34,7 +34,7 @@ TTLAggregationAlgorithm::TTLAggregationAlgorithm(
        0,
        settings.max_bytes_before_external_group_by,
        settings.empty_result_for_aggregation_by_empty_set,
-        storage_.getContext()->getTemporaryVolume(),
+        storage_.getContext()->getTempDataOnDisk(),
        settings.max_threads,
        settings.min_free_disk_space_for_temporary_data,
        settings.compile_aggregate_expressions,
--- a/src/Processors/Transforms/AggregatingTransform.cpp
+++ b/src/Processors/Transforms/AggregatingTransform.cpp
@ -53,33 +53,29 @@ namespace
    class SourceFromNativeStream : public ISource
    {
    public:
-        SourceFromNativeStream(const Block & header, const std::string & path)
-                : ISource(header), file_in(path), compressed_in(file_in),
-                  block_in(std::make_unique<NativeReader>(compressed_in, DBMS_TCP_PROTOCOL_VERSION))
-        {
-        }
+        explicit SourceFromNativeStream(TemporaryFileStream * tmp_stream_)
+            : ISource(tmp_stream_->getHeader())
+            , tmp_stream(tmp_stream_)
+        {}

        String getName() const override { return "SourceFromNativeStream"; }

        Chunk generate() override
        {
-            if (!block_in)
+            if (!tmp_stream)
                return {};

-            auto block = block_in->read();
+            auto block = tmp_stream->read();
            if (!block)
            {
-                block_in.reset();
+                tmp_stream = nullptr;
                return {};
            }
-
            return convertToChunk(block);
        }

    private:
-        ReadBufferFromFile file_in;
-        CompressedReadBuffer compressed_in;
-        std::unique_ptr<NativeReader> block_in;
+        TemporaryFileStream * tmp_stream;
    };
 }

@ -564,7 +560,7 @@ void AggregatingTransform::initGenerate()
        elapsed_seconds, src_rows / elapsed_seconds,
        ReadableSize(src_bytes / elapsed_seconds));

-    if (params->aggregator.hasTemporaryFiles())
+    if (params->aggregator.hasTemporaryData())
    {
        if (variants.isConvertibleToTwoLevel())
            variants.convertToTwoLevel();
@ -577,7 +573,7 @@ void AggregatingTransform::initGenerate()
    if (many_data->num_finished.fetch_add(1) + 1 < many_data->variants.size())
        return;

-    if (!params->aggregator.hasTemporaryFiles())
+    if (!params->aggregator.hasTemporaryData())
    {
        auto prepared_data = params->aggregator.prepareVariantsToMerge(many_data->variants);
        auto prepared_data_ptr = std::make_shared<ManyAggregatedDataVariants>(std::move(prepared_data));
@ -604,25 +600,27 @@ void AggregatingTransform::initGenerate()
            }
        }

-        const auto & files = params->aggregator.getTemporaryFiles();
-        Pipe pipe;
+        const auto & tmp_data = params->aggregator.getTemporaryData();

+        Pipe pipe;
        {
-            auto header = params->aggregator.getHeader(false);
            Pipes pipes;

-            for (const auto & file : files.files)
-                pipes.emplace_back(Pipe(std::make_unique<SourceFromNativeStream>(header, file->path())));
+            for (auto * tmp_stream : tmp_data.getStreams())
+                pipes.emplace_back(Pipe(std::make_unique<SourceFromNativeStream>(tmp_stream)));

            pipe = Pipe::unitePipes(std::move(pipes));
        }

+        size_t num_streams = tmp_data.getStreams().size();
+        size_t compressed_size = tmp_data.getStat().compressed_size;
+        size_t uncompressed_size = tmp_data.getStat().uncompressed_size;
        LOG_DEBUG(
            log,
            "Will merge {} temporary files of size {} compressed, {} uncompressed.",
-            files.files.size(),
-            ReadableSize(files.sum_size_compressed),
-            ReadableSize(files.sum_size_uncompressed));
+            num_streams,
+            ReadableSize(compressed_size),
+            ReadableSize(uncompressed_size));

        addMergingAggregatedMemoryEfficientTransform(pipe, params, temporary_data_merge_threads);

--- a/src/Processors/Transforms/MergeSortingTransform.cpp
+++ b/src/Processors/Transforms/MergeSortingTransform.cpp
@ -30,21 +30,15 @@ namespace CurrentMetrics
 namespace DB
 {

-namespace ErrorCodes
-{
-    extern const int NOT_ENOUGH_SPACE;
-}
-
-
 class BufferingToFileTransform : public IAccumulatingTransform
 {
 public:
-    BufferingToFileTransform(const Block & header, Poco::Logger * log_, std::string path_)
-        : IAccumulatingTransform(header, header), log(log_)
-        , path(std::move(path_)), file_buf_out(path), compressed_buf_out(file_buf_out)
-        , out_stream(std::make_unique<NativeWriter>(compressed_buf_out, 0, header))
+    BufferingToFileTransform(const Block & header, TemporaryFileStream & tmp_stream_, Poco::Logger * log_)
+        : IAccumulatingTransform(header, header)
+        , tmp_stream(tmp_stream_)
+        , log(log_)
    {
-        LOG_INFO(log, "Sorting and writing part of data into temporary file {}", path);
+        LOG_INFO(log, "Sorting and writing part of data into temporary file {}", tmp_stream.path());
        ProfileEvents::increment(ProfileEvents::ExternalSortWritePart);
    }

@ -52,71 +46,37 @@ public:

    void consume(Chunk chunk) override
    {
-        out_stream->write(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()));
+        Block block = getInputPort().getHeader().cloneWithColumns(chunk.detachColumns());
+        tmp_stream.write(block);
    }

    Chunk generate() override
    {
-        if (out_stream)
+        if (!tmp_stream.isWriteFinished())
        {
-            out_stream->flush();
-            compressed_buf_out.next();
-            file_buf_out.next();
+            auto stat = tmp_stream.finishWriting();

-            auto stat = updateWriteStat();
+            ProfileEvents::increment(ProfileEvents::ExternalProcessingCompressedBytesTotal, stat.compressed_size);
+            ProfileEvents::increment(ProfileEvents::ExternalProcessingUncompressedBytesTotal, stat.uncompressed_size);
+            ProfileEvents::increment(ProfileEvents::ExternalSortCompressedBytes, stat.compressed_size);
+            ProfileEvents::increment(ProfileEvents::ExternalSortUncompressedBytes, stat.uncompressed_size);

            LOG_INFO(log, "Done writing part of data into temporary file {}, compressed {}, uncompressed {} ",
-                path, ReadableSize(static_cast<double>(stat.compressed_size)), ReadableSize(static_cast<double>(stat.uncompressed_size)));
-
-            out_stream.reset();
-
-            file_in = std::make_unique<ReadBufferFromFile>(path);
-            compressed_in = std::make_unique<CompressedReadBuffer>(*file_in);
-            block_in = std::make_unique<NativeReader>(*compressed_in, getOutputPort().getHeader(), 0);
+                tmp_stream.path(), ReadableSize(static_cast<double>(stat.compressed_size)), ReadableSize(static_cast<double>(stat.uncompressed_size)));
        }

-        if (!block_in)
-            return {};
-
-        auto block = block_in->read();
+        Block block = tmp_stream.read();
        if (!block)
-        {
-            block_in.reset();
            return {};
-        }

        UInt64 num_rows = block.rows();
        return Chunk(block.getColumns(), num_rows);
    }

 private:
-    struct Stat
-    {
-        size_t compressed_size = 0;
-        size_t uncompressed_size = 0;
-    };
-
-    Stat updateWriteStat()
-    {
-        Stat res{compressed_buf_out.getCompressedBytes(), compressed_buf_out.getUncompressedBytes()};
-
-        ProfileEvents::increment(ProfileEvents::ExternalProcessingCompressedBytesTotal, res.compressed_size);
-        ProfileEvents::increment(ProfileEvents::ExternalProcessingUncompressedBytesTotal, res.uncompressed_size);
-
-        ProfileEvents::increment(ProfileEvents::ExternalSortCompressedBytes, res.compressed_size);
-        ProfileEvents::increment(ProfileEvents::ExternalSortUncompressedBytes, res.uncompressed_size);
-        return res;
-    }
+    TemporaryFileStream & tmp_stream;

    Poco::Logger * log;
-    std::string path;
-    WriteBufferFromFile file_buf_out;
-    CompressedWriteBuffer compressed_buf_out;
-    std::unique_ptr<NativeWriter> out_stream;
-
-    std::unique_ptr<ReadBufferFromFile> file_in;
-    std::unique_ptr<CompressedReadBuffer> compressed_in;
-    std::unique_ptr<NativeReader> block_in;
 };

 MergeSortingTransform::MergeSortingTransform(
@ -128,13 +88,13 @@ MergeSortingTransform::MergeSortingTransform(
    size_t max_bytes_before_remerge_,
    double remerge_lowered_memory_bytes_ratio_,
    size_t max_bytes_before_external_sort_,
-    VolumePtr tmp_volume_,
+    TemporaryDataOnDiskPtr tmp_data_,
    size_t min_free_disk_space_)
    : SortingTransform(header, description_, max_merged_block_size_, limit_, increase_sort_description_compile_attempts)
    , max_bytes_before_remerge(max_bytes_before_remerge_)
    , remerge_lowered_memory_bytes_ratio(remerge_lowered_memory_bytes_ratio_)
    , max_bytes_before_external_sort(max_bytes_before_external_sort_)
-    , tmp_volume(tmp_volume_)
+    , tmp_data(std::move(tmp_data_))
    , min_free_disk_space(min_free_disk_space_)
 {
 }
@ -209,17 +169,12 @@ void MergeSortingTransform::consume(Chunk chunk)
      */
    if (max_bytes_before_external_sort && sum_bytes_in_blocks > max_bytes_before_external_sort)
    {
-        size_t size = sum_bytes_in_blocks + min_free_disk_space;
-        auto reservation = tmp_volume->reserve(size);
-        if (!reservation)
-            throw Exception("Not enough space for external sort in temporary storage", ErrorCodes::NOT_ENOUGH_SPACE);
+        /// If there's less free disk space than reserve_size, an exception will be thrown
+        size_t reserve_size = sum_bytes_in_blocks + min_free_disk_space;
+        auto & tmp_stream = tmp_data->createStream(header_without_constants, CurrentMetrics::TemporaryFilesForSort, reserve_size);

-        temporary_files.emplace_back(std::make_unique<TemporaryFileOnDisk>(reservation->getDisk(), CurrentMetrics::TemporaryFilesForSort));
-
-        const std::string & path = temporary_files.back()->path();
-        merge_sorter
-            = std::make_unique<MergeSorter>(header_without_constants, std::move(chunks), description, max_merged_block_size, limit);
-        auto current_processor = std::make_shared<BufferingToFileTransform>(header_without_constants, log, path);
+        merge_sorter = std::make_unique<MergeSorter>(header_without_constants, std::move(chunks), description, max_merged_block_size, limit);
+        auto current_processor = std::make_shared<BufferingToFileTransform>(header_without_constants, tmp_stream, log);

        processors.emplace_back(current_processor);

@ -261,13 +216,14 @@ void MergeSortingTransform::generate()
 {
    if (!generated_prefix)
    {
-        if (temporary_files.empty())
+        size_t num_tmp_files = tmp_data ? tmp_data->getStreams().size() : 0;
+        if (num_tmp_files == 0)
            merge_sorter
                = std::make_unique<MergeSorter>(header_without_constants, std::move(chunks), description, max_merged_block_size, limit);
        else
        {
            ProfileEvents::increment(ProfileEvents::ExternalSortMerge);
-            LOG_INFO(log, "There are {} temporary sorted parts to merge", temporary_files.size());
+            LOG_INFO(log, "There are {} temporary sorted parts to merge", num_tmp_files);

            processors.emplace_back(std::make_shared<MergeSorterSource>(
                    header_without_constants, std::move(chunks), description, max_merged_block_size, limit));
--- a/src/Processors/Transforms/MergeSortingTransform.h
+++ b/src/Processors/Transforms/MergeSortingTransform.h
@ -4,6 +4,7 @@
 #include <Core/SortDescription.h>
 #include <Common/filesystemHelpers.h>
 #include <Disks/TemporaryFileOnDisk.h>
+#include <Interpreters/TemporaryDataOnDisk.h>
 #include <Common/logger_useful.h>


@ -28,7 +29,7 @@ public:
        size_t max_bytes_before_remerge_,
        double remerge_lowered_memory_bytes_ratio_,
        size_t max_bytes_before_external_sort_,
-        VolumePtr tmp_volume_,
+        TemporaryDataOnDiskPtr tmp_data_,
        size_t min_free_disk_space_);

    String getName() const override { return "MergeSortingTransform"; }
@ -44,7 +45,7 @@ private:
    size_t max_bytes_before_remerge;
    double remerge_lowered_memory_bytes_ratio;
    size_t max_bytes_before_external_sort;
-    VolumePtr tmp_volume;
+    TemporaryDataOnDiskPtr tmp_data;
    size_t min_free_disk_space;

    size_t sum_rows_in_blocks = 0;
@ -55,9 +56,6 @@ private:
    /// If remerge doesn't save memory at least several times, mark it as useless and don't do it anymore.
    bool remerge_is_useful = true;

-    /// Everything below is for external sorting.
-    std::vector<TemporaryFileOnDiskHolder> temporary_files;
-
    /// Merge all accumulated blocks to keep no more than limit rows.
    void remerge();

--- a/src/Server/HTTPHandler.cpp
+++ b/src/Server/HTTPHandler.cpp
@ -26,7 +26,7 @@
 #include <Common/StringUtils/StringUtils.h>
 #include <Common/setThreadName.h>
 #include <Common/typeid_cast.h>
-#include <Parsers/ParserSetQuery.h>
+#include <Parsers/ASTSetQuery.h>

 #include <base/getFQDNOrHostName.h>
 #include <base/scope_guard.h>
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@ -4917,24 +4917,33 @@ ReservationPtr MergeTreeData::tryReserveSpacePreferringTTLRules(
    if (move_ttl_entry)
    {
        LOG_TRACE(log, "Got move TTL entry, will try to reserver destination for move");
-        SpacePtr destination_ptr = getDestinationForMoveTTL(*move_ttl_entry, is_insert);
+        SpacePtr destination_ptr = getDestinationForMoveTTL(*move_ttl_entry);
+        bool perform_ttl_move_on_insert = is_insert && destination_ptr && shouldPerformTTLMoveOnInsert(destination_ptr);
+
        if (!destination_ptr)
        {
            if (move_ttl_entry->destination_type == DataDestinationType::VOLUME && !move_ttl_entry->if_exists)
                LOG_WARNING(
                    log,
-                    "Would like to reserve space on volume '{}' by TTL rule of table '{}' but volume was not found or rule is not "
-                    "applicable at the moment",
+                    "Would like to reserve space on volume '{}' by TTL rule of table '{}' but volume was not found",
                    move_ttl_entry->destination_name,
                    *std::atomic_load(&log_name));
            else if (move_ttl_entry->destination_type == DataDestinationType::DISK && !move_ttl_entry->if_exists)
                LOG_WARNING(
                    log,
-                    "Would like to reserve space on disk '{}' by TTL rule of table '{}' but disk was not found or rule is not applicable "
-                    "at the moment",
+                    "Would like to reserve space on disk '{}' by TTL rule of table '{}' but disk was not found",
                    move_ttl_entry->destination_name,
                    *std::atomic_load(&log_name));
        }
+        else if (is_insert && !perform_ttl_move_on_insert)
+        {
+            LOG_TRACE(
+                log,
+                "TTL move on insert to {} {} for table {} is disabled",
+                (move_ttl_entry->destination_type == DataDestinationType::VOLUME ? "volume" : "disk"),
+                move_ttl_entry->destination_name,
+                *std::atomic_load(&log_name));
+        }
        else
        {
            LOG_TRACE(log, "Reserving bytes on selected destination");
@ -4978,41 +4987,33 @@ ReservationPtr MergeTreeData::tryReserveSpacePreferringTTLRules(
    return reservation;
 }

-SpacePtr MergeTreeData::getDestinationForMoveTTL(const TTLDescription & move_ttl, bool is_insert) const
+SpacePtr MergeTreeData::getDestinationForMoveTTL(const TTLDescription & move_ttl) const
 {
    auto policy = getStoragePolicy();
    if (move_ttl.destination_type == DataDestinationType::VOLUME)
-    {
-        auto volume = policy->tryGetVolumeByName(move_ttl.destination_name);
-
-        if (!volume)
-            return {};
-
-        if (is_insert && !volume->perform_ttl_move_on_insert)
-            return {};
-
-        return volume;
-    }
+        return policy->tryGetVolumeByName(move_ttl.destination_name);
    else if (move_ttl.destination_type == DataDestinationType::DISK)
-    {
-        auto disk = policy->tryGetDiskByName(move_ttl.destination_name);
-
-        if (!disk)
-            return {};
-
-        auto volume = policy->getVolume(policy->getVolumeIndexByDisk(disk));
-        if (!volume)
-            return {};
-
-        if (is_insert && !volume->perform_ttl_move_on_insert)
-            return {};
-
-        return disk;
-    }
+        return policy->tryGetDiskByName(move_ttl.destination_name);
    else
        return {};
 }

+bool MergeTreeData::shouldPerformTTLMoveOnInsert(const SpacePtr & move_destination) const
+{
+    if (move_destination->isVolume())
+    {
+        auto volume = std::static_pointer_cast<IVolume>(move_destination);
+        return volume->perform_ttl_move_on_insert;
+    }
+    if (move_destination->isDisk())
+    {
+        auto disk = std::static_pointer_cast<IDisk>(move_destination);
+        if (auto volume = getStoragePolicy()->tryGetVolumeByDisk(disk))
+            return volume->perform_ttl_move_on_insert;
+    }
+    return false;
+}
+
 bool MergeTreeData::isPartInTTLDestination(const TTLDescription & ttl, const IMergeTreeDataPart & part) const
 {
    auto policy = getStoragePolicy();
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@ -867,9 +867,12 @@ public:

    /// Return alter conversions for part which must be applied on fly.
    AlterConversions getAlterConversionsForPart(MergeTreeDataPartPtr part) const;
-    /// Returns destination disk or volume for the TTL rule according to current storage policy
-    /// 'is_insert' - is TTL move performed on new data part insert.
-    SpacePtr getDestinationForMoveTTL(const TTLDescription & move_ttl, bool is_insert = false) const;
+
+    /// Returns destination disk or volume for the TTL rule according to current storage policy.
+    SpacePtr getDestinationForMoveTTL(const TTLDescription & move_ttl) const;
+
+    /// Whether INSERT of a data part which is already expired should move it immediately to a volume/disk declared in move rule.
+    bool shouldPerformTTLMoveOnInsert(const SpacePtr & move_destination) const;

    /// Checks if given part already belongs destination disk or volume for the
    /// TTL rule.
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@ -310,7 +310,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read(
                settings.group_by_two_level_threshold_bytes,
                settings.max_bytes_before_external_group_by,
                settings.empty_result_for_aggregation_by_empty_set,
-                context->getTemporaryVolume(),
+                context->getTempDataOnDisk(),
                settings.max_threads,
                settings.min_free_disk_space_for_temporary_data,
                settings.compile_aggregate_expressions,
--- a/src/Storages/ReadInOrderOptimizer.cpp
+++ b/src/Storages/ReadInOrderOptimizer.cpp
@ -103,10 +103,17 @@ NameSet getFixedSortingColumns(
    return fixed_points;
 }

+struct MatchResult
+{
+    /// One of {-1, 0, 1} - direction of the match. 0 means - doesn't match.
+    int direction = 0;
+    /// If true then current key must be the last in the matched prefix of sort description.
+    bool is_last_key = false;
+};
+
 /// Optimize in case of exact match with order key element
 /// or in some simple cases when order key element is wrapped into monotonic function.
-/// Returns on of {-1, 0, 1} - direction of the match. 0 means - doesn't match.
-int matchSortDescriptionAndKey(
+MatchResult matchSortDescriptionAndKey(
    const ExpressionActions::Actions & actions,
    const SortColumnDescription & sort_column,
    const String & sorting_key_column)
@ -114,12 +121,13 @@ int matchSortDescriptionAndKey(
    /// If required order depend on collation, it cannot be matched with primary key order.
    /// Because primary keys cannot have collations.
    if (sort_column.collator)
-        return 0;
+        return {};
+
+    MatchResult result{sort_column.direction, false};

-    int current_direction = sort_column.direction;
    /// For the path: order by (sort_column, ...)
    if (sort_column.column_name == sorting_key_column)
-        return current_direction;
+        return result;

    /// For the path: order by (function(sort_column), ...)
    /// Allow only one simple monotonic functions with one argument
@ -132,44 +140,35 @@ int matchSortDescriptionAndKey(
            continue;

        if (found_function)
-        {
-            current_direction = 0;
-            break;
-        }
-        else
-        {
-            found_function = true;
-        }
+            return {};

+        found_function = true;
        if (action.node->children.size() != 1 || action.node->children.at(0)->result_name != sorting_key_column)
-        {
-            current_direction = 0;
-            break;
-        }
+            return {};

        const auto & func = *action.node->function_base;
        if (!func.hasInformationAboutMonotonicity())
-        {
-            current_direction = 0;
-            break;
-        }
+            return {};

        auto monotonicity = func.getMonotonicityForRange(*func.getArgumentTypes().at(0), {}, {});
        if (!monotonicity.is_monotonic)
-        {
-            current_direction = 0;
-            break;
-        }
-        else if (!monotonicity.is_positive)
-        {
-            current_direction *= -1;
-        }
+            return {};
+
+        /// If function is not strict monotonic, it can break order
+        /// if it's not last in the prefix of sort description.
+        /// E.g. if we have ORDER BY (d, u) -- ('2020-01-01', 1), ('2020-01-02', 0), ('2020-01-03', 1)
+        /// ORDER BY (toStartOfMonth(d), u) -- ('2020-01-01', 1), ('2020-01-01', 0), ('2020-01-01', 1)
+        if (!monotonicity.is_strict)
+            result.is_last_key = true;
+
+        if (!monotonicity.is_positive)
+            result.direction *= -1;
    }

    if (!found_function)
-        current_direction = 0;
+        return {};

-    return current_direction;
+    return result;
 }

 }
@ -218,8 +217,8 @@ InputOrderInfoPtr ReadInOrderOptimizer::getInputOrderImpl(
        if (forbidden_columns.contains(description[desc_pos].column_name))
            break;

-        int current_direction = matchSortDescriptionAndKey(actions[desc_pos]->getActions(), description[desc_pos], sorting_key_columns[key_pos]);
-        bool is_matched = current_direction && (desc_pos == 0 || current_direction == read_direction);
+        auto match = matchSortDescriptionAndKey(actions[desc_pos]->getActions(), description[desc_pos], sorting_key_columns[key_pos]);
+        bool is_matched = match.direction && (desc_pos == 0 || match.direction == read_direction);

        if (!is_matched)
        {
@ -235,12 +234,15 @@ InputOrderInfoPtr ReadInOrderOptimizer::getInputOrderImpl(
        }

        if (desc_pos == 0)
-            read_direction = current_direction;
+            read_direction = match.direction;

        sort_description_for_merging.push_back(description[desc_pos]);

        ++desc_pos;
        ++key_pos;
+
+        if (match.is_last_key)
+            break;
    }

    if (sort_description_for_merging.empty())
--- a/src/Storages/StorageS3Settings.h
+++ b/src/Storages/StorageS3Settings.h
@ -56,6 +56,8 @@ struct S3Settings
            headers = from.headers;
            region = from.region;
            server_side_encryption_customer_key_base64 = from.server_side_encryption_customer_key_base64;
+            use_environment_credentials = from.use_environment_credentials;
+            use_insecure_imds_request = from.use_insecure_imds_request;
        }
    };

--- a/tests/ci/release.py
+++ b/tests/ci/release.py
@ -444,7 +444,8 @@ def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        description="Script to release a new ClickHouse version, requires `git` and "
-        "`gh` (github-cli) commands",
+        "`gh` (github-cli) commands "
+        "!!! LAUNCH IT ONLY FROM THE MASTER BRANCH !!!",
    )

    parser.add_argument(
@ -468,10 +469,11 @@ def parse_args() -> argparse.Namespace:
    )
    parser.add_argument(
        "--type",
-        default="minor",
+        required=True,
        choices=Release.BIG + Release.SMALL,
        dest="release_type",
-        help="a release type, new branch is created only for 'major' and 'minor'",
+        help="a release type to bump the major.minor.patch version part, "
+        "new branch is created only for 'major' and 'minor'",
    )
    parser.add_argument("--with-release-branch", default=True, help=argparse.SUPPRESS)
    parser.add_argument(
--- a/tests/integration/test_disks_app_func/test.py
+++ b/tests/integration/test_disks_app_func/test.py
@ -119,6 +119,44 @@ def test_disks_app_func_cp(started_cluster):

    assert "path1" in out

+    source.exec_in_container(
+        [
+            "/usr/bin/clickhouse",
+            "disks",
+            "--send-logs",
+            "--disk",
+            "test2",
+            "remove",
+            "path1",
+        ]
+    )
+
+    source.exec_in_container(
+        [
+            "/usr/bin/clickhouse",
+            "disks",
+            "--send-logs",
+            "--disk",
+            "test1",
+            "remove",
+            "path1",
+        ]
+    )
+
+    # alesapin: Why we need list one more time?
+    # kssenii: it is an assertion that the file is indeed deleted
+    out = source.exec_in_container(
+        ["/usr/bin/clickhouse", "disks", "--send-logs", "--disk", "test2", "list", "."]
+    )
+
+    assert "path1" not in out
+
+    out = source.exec_in_container(
+        ["/usr/bin/clickhouse", "disks", "--send-logs", "--disk", "test1", "list", "."]
+    )
+
+    assert "path1" not in out
+

 def test_disks_app_func_ln(started_cluster):
    source = cluster.instances["disks_app_test"]
--- a/tests/integration/test_storage_s3/configs/use_environment_credentials.xml
+++ b/tests/integration/test_storage_s3/configs/use_environment_credentials.xml
@ -0,0 +1,5 @@
+<clickhouse>
+    <s3>
+      <use_environment_credentials>1</use_environment_credentials>
+    </s3>
+</clickhouse>
--- a/tests/integration/test_storage_s3/test.py
+++ b/tests/integration/test_storage_s3/test.py
@ -114,6 +114,16 @@ def started_cluster():
            "s3_non_default",
            with_minio=True,
        )
+        cluster.add_instance(
+            "s3_with_environment_credentials",
+            with_minio=True,
+            env_variables={
+                "AWS_ACCESS_KEY_ID": "minio",
+                "AWS_SECRET_ACCESS_KEY": "minio123",
+            },
+            main_configs=["configs/use_environment_credentials.xml"],
+        )
+
        logging.info("Starting cluster...")
        cluster.start()
        logging.info("Cluster started")
@ -1712,3 +1722,19 @@ def test_ast_auth_headers(started_cluster):
    )

    assert result.strip() == "1\t2\t3"
+
+
+def test_environment_credentials(started_cluster):
+    filename = "test.csv"
+    bucket = started_cluster.minio_restricted_bucket
+
+    instance = started_cluster.instances["s3_with_environment_credentials"]
+    instance.query(
+        f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_cache3.jsonl') select * from numbers(100) settings s3_truncate_on_insert=1"
+    )
+    assert (
+        "100"
+        == instance.query(
+            f"select count() from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_cache3.jsonl')"
+        ).strip()
+    )
--- a/tests/queries/0_stateless/00107_totals_after_having.sql
+++ b/tests/queries/0_stateless/00107_totals_after_having.sql
@ -30,7 +30,7 @@ SELECT intDiv(number, 2) AS k, count(), argMax(toString(number), number) FROM (S

 SELECT '*** External aggregation.';

-SET max_bytes_before_external_group_by=1000000;
+SET max_bytes_before_external_group_by = 1000000;
 SET group_by_two_level_threshold = 100000;

 SELECT '**** totals_mode = after_having_auto';
--- a/tests/queries/0_stateless/02149_read_in_order_fixed_prefix.reference
+++ b/tests/queries/0_stateless/02149_read_in_order_fixed_prefix.reference
@ -8,11 +8,13 @@ ExpressionTransform
  (Limit)
  Limit
    (Sorting)
-    MergingSortedTransform 2 → 1
-      (Expression)
-      ExpressionTransform × 2
-        (ReadFromMergeTree)
-        MergeTreeInOrder × 2 0 → 1
+    FinishSortingTransform
+      PartialSortingTransform
+        MergingSortedTransform 2 → 1
+          (Expression)
+          ExpressionTransform × 2
+            (ReadFromMergeTree)
+            MergeTreeInOrder × 2 0 → 1
 2020-10-01	9
 2020-10-01	9
 2020-10-01	9
@ -23,14 +25,16 @@ ExpressionTransform
  (Limit)
  Limit
    (Sorting)
-    MergingSortedTransform 2 → 1
-      (Expression)
-      ExpressionTransform × 2
-        (ReadFromMergeTree)
-        ReverseTransform
-          MergeTreeReverse 0 → 1
+    FinishSortingTransform
+      PartialSortingTransform
+        MergingSortedTransform 2 → 1
+          (Expression)
+          ExpressionTransform × 2
+            (ReadFromMergeTree)
            ReverseTransform
              MergeTreeReverse 0 → 1
+                ReverseTransform
+                  MergeTreeReverse 0 → 1
 2020-10-01	9
 2020-10-01	9
 2020-10-01	9
--- a/tests/queries/0_stateless/02403_big_http_chunk_size.python
+++ b/tests/queries/0_stateless/02403_big_http_chunk_size.python
@ -13,7 +13,7 @@ def main():

    sock = socket(AF_INET, SOCK_STREAM)
    sock.connect((host, port))
-    sock.settimeout(5)
+    sock.settimeout(60)
    s = "POST /play HTTP/1.1\r\n"
    s += "Host: %s\r\n" % host
    s += "Content-type: multipart/form-data\r\n"
--- a/tests/queries/0_stateless/02451_order_by_monotonic.reference
+++ b/tests/queries/0_stateless/02451_order_by_monotonic.reference
@ -0,0 +1,22 @@
+4
+2022-09-09 12:00:00	0
+2022-09-09 12:00:00	0x
+2022-09-09 12:00:00	0x
+2022-09-09 12:00:00	1
+2022-09-09 12:00:00	1x
+  Prefix sort description: toStartOfMinute(t) ASC
+  Result sort description: toStartOfMinute(t) ASC, c1 ASC
+  Prefix sort description: toStartOfMinute(t) ASC
+  Result sort description: toStartOfMinute(t) ASC
+  Prefix sort description: negate(a) ASC
+  Result sort description: negate(a) ASC
+  Prefix sort description: negate(a) ASC, negate(b) ASC
+  Result sort description: negate(a) ASC, negate(b) ASC
+  Prefix sort description: a DESC, negate(b) ASC
+  Result sort description: a DESC, negate(b) ASC
+  Prefix sort description: negate(a) ASC, b DESC
+  Result sort description: negate(a) ASC, b DESC
+  Prefix sort description: negate(a) ASC
+  Result sort description: negate(a) ASC, b ASC
+  Prefix sort description: a ASC
+  Result sort description: a ASC, negate(b) ASC
--- a/tests/queries/0_stateless/02451_order_by_monotonic.sh
+++ b/tests/queries/0_stateless/02451_order_by_monotonic.sh
@ -0,0 +1,40 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+function explain_sort_description()
+{
+    out=$($CLICKHOUSE_CLIENT --optimize_read_in_order=1 -q "EXPLAIN PLAN actions = 1 $1")
+    echo "$out" | grep "Prefix sort description:"
+    echo "$out" | grep "Result sort description:"
+}
+
+$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS t_order_by_monotonic"
+$CLICKHOUSE_CLIENT -q "CREATE TABLE t_order_by_monotonic (t DateTime, c1 String) ENGINE = MergeTree ORDER BY (t, c1)
+    AS SELECT '2022-09-09 12:00:00', toString(number % 2) FROM numbers(2) UNION ALL
+       SELECT '2022-09-09 12:00:30', toString(number % 2)|| 'x' FROM numbers(3)"
+
+$CLICKHOUSE_CLIENT --optimize_aggregation_in_order=1 -q "SELECT count() FROM
+    (SELECT toStartOfMinute(t) AS s, c1 FROM t_order_by_monotonic GROUP BY s, c1)"
+
+$CLICKHOUSE_CLIENT --optimize_read_in_order=1 -q "SELECT toStartOfMinute(t) AS s, c1 FROM t_order_by_monotonic ORDER BY s, c1"
+
+explain_sort_description "SELECT toStartOfMinute(t) AS s, c1 FROM t_order_by_monotonic ORDER BY s, c1"
+explain_sort_description "SELECT toStartOfMinute(t) AS s, c1 FROM t_order_by_monotonic ORDER BY s"
+
+$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS t_order_by_monotonic"
+
+$CLICKHOUSE_CLIENT -q "CREATE TABLE t_order_by_monotonic (a Int64, b Int64) ENGINE = MergeTree ORDER BY (a, b)"
+
+$CLICKHOUSE_CLIENT -q "INSERT INTO t_order_by_monotonic VALUES (1, 1) (1, 2), (2, 1) (2, 2)"
+
+explain_sort_description "SELECT * FROM t_order_by_monotonic ORDER BY -a"
+explain_sort_description "SELECT * FROM t_order_by_monotonic ORDER BY -a, -b"
+explain_sort_description "SELECT * FROM t_order_by_monotonic ORDER BY a DESC, -b"
+explain_sort_description "SELECT * FROM t_order_by_monotonic ORDER BY -a, b DESC"
+explain_sort_description "SELECT * FROM t_order_by_monotonic ORDER BY -a, b"
+explain_sort_description "SELECT * FROM t_order_by_monotonic ORDER BY a, -b"
+
+$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS t_order_by_monotonic"
--- a/tests/queries/0_stateless/02454_set_parameters_formatting.reference
+++ b/tests/queries/0_stateless/02454_set_parameters_formatting.reference
@ -0,0 +1,3 @@
+SET param_a = 1
+SET max_threads = 1, param_a = 1
+SET max_threads = 1, param_a = 1
--- a/tests/queries/0_stateless/02454_set_parameters_formatting.sh
+++ b/tests/queries/0_stateless/02454_set_parameters_formatting.sh
@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+set -e
+
+format="$CLICKHOUSE_FORMAT"
+
+echo "set param_a = 1" | $format
+echo "set max_threads = 1, param_a = 1" | $format
+echo "set param_a = 1, max_threads = 1" | $format
--- a/tests/queries/0_stateless/02455_count_state_asterisk.reference
+++ b/tests/queries/0_stateless/02455_count_state_asterisk.reference
@ -0,0 +1 @@
+1
--- a/tests/queries/0_stateless/02455_count_state_asterisk.sql
+++ b/tests/queries/0_stateless/02455_count_state_asterisk.sql
@ -0,0 +1,11 @@
+drop table if exists a;
+drop table if exists b;
+
+create table a (i int, j int) engine Log;
+create materialized view b engine Log as select countState(*) from a;
+
+insert into a values (1, 2);
+select countMerge(*) from b;
+
+drop table b;
+drop table a;
--- a/tests/queries/0_stateless/02455_default_union_except_intersect.reference
+++ b/tests/queries/0_stateless/02455_default_union_except_intersect.reference
@ -0,0 +1,9 @@
+SELECT 1
+UNION
+SELECT 1
+SELECT 2
+EXCEPT
+SELECT 2
+SELECT 3
+INTERSECT
+SELECT 3
--- a/tests/queries/0_stateless/02455_default_union_except_intersect.sh
+++ b/tests/queries/0_stateless/02455_default_union_except_intersect.sh
@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+set -e
+
+format="$CLICKHOUSE_FORMAT"
+
+echo "SELECT 1 UNION SELECT 1" | $format
+echo "SELECT 2 EXCEPT SELECT 2" | $format
+echo "SELECT 3 INTERSECT SELECT 3" | $format
--- a/tests/queries/0_stateless/02455_one_row_from_csv_memory_usage.reference
+++ b/tests/queries/0_stateless/02455_one_row_from_csv_memory_usage.reference
@ -0,0 +1,2 @@
+fred81	4081	Fred	Lane	1981-05-18
+fred81	4081	Fred	Lane	1981-05-18
--- a/tests/queries/0_stateless/02455_one_row_from_csv_memory_usage.sh
+++ b/tests/queries/0_stateless/02455_one_row_from_csv_memory_usage.sh
@ -0,0 +1,11 @@
+#!/usr/bin/env bash
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+#  shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+user_files_path=$($CLICKHOUSE_CLIENT --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep -E '^Code: 107.*FILE_DOESNT_EXIST' | head -1 | awk '{gsub("/nonexist.txt","",$9); print $9}')
+cp "$CUR_DIR"/data_csv/10m_rows.csv.xz $user_files_path/
+
+${CLICKHOUSE_CLIENT} --query="SELECT * FROM file('$user_files_path/10m_rows.csv.xz' , 'CSVWithNames') LIMIT 1 settings max_memory_usage=1000000000"
+${CLICKHOUSE_CLIENT} --query="SELECT * FROM file('$user_files_path/10m_rows.csv.xz' , 'CSVWithNames') LIMIT 1 settings max_memory_usage=100000000"
--- a/tests/queries/0_stateless/data_csv/10m_rows.csv.xz
+++ b/tests/queries/0_stateless/data_csv/10m_rows.csv.xz
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@ -1,10 +1,13 @@
+v22.9.3.18-stable	2022-09-30
 v22.9.2.7-stable	2022-09-23
 v22.9.1.2603-stable	2022-09-22
+v22.8.6.71-lts	2022-09-30
 v22.8.5.29-lts	2022-09-13
 v22.8.4.7-lts	2022-08-31
 v22.8.3.13-lts	2022-08-29
 v22.8.2.11-lts	2022-08-23
 v22.8.1.2097-lts	2022-08-18
+v22.7.6.74-stable	2022-09-30
 v22.7.5.13-stable	2022-08-29
 v22.7.4.16-stable	2022-08-23
 v22.7.3.5-stable	2022-08-10
@ -27,6 +30,7 @@ v22.4.5.9-stable	2022-05-06
 v22.4.4.7-stable	2022-04-29
 v22.4.3.3-stable	2022-04-26
 v22.4.2.1-stable	2022-04-22
+v22.3.13.80-lts	2022-09-30
 v22.3.12.19-lts	2022-08-29
 v22.3.11.12-lts	2022-08-10
 v22.3.10.22-lts	2022-08-03