Merge remote-tracking branch 'origin/master' into HEAD

2024-11-10 01:25:21 +00:00 · 2021-01-19 20:22:03 +03:00 · 2021-01-19 20:22:03 +03:00 · 847a4e5d7c
commit 847a4e5d7c
parent 1bd0c25bf0 f5931a2289
38 changed files with 2944 additions and 2076 deletions
--- a/.github/ISSUE_TEMPLATE/40_bug-report.md
+++ b/.github/ISSUE_TEMPLATE/40_bug-report.md
@ -12,6 +12,9 @@ assignees: ''
 **Describe the bug**
 A clear and concise description of what works not as it is supposed to.

+**Does it reproduce on recent release?**
+[The list of releases](https://github.com/ClickHouse/ClickHouse/blob/master/utils/list-versions/version_date.tsv)
+
 **How to reproduce**
 * Which ClickHouse server version to use
 * Which interface to use, if matters
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -12,6 +12,8 @@
 * Removed aggregate functions `timeSeriesGroupSum`, `timeSeriesGroupRateSum` because a friend of mine said they never worked. This fixes [#16869](https://github.com/ClickHouse/ClickHouse/issues/16869). If you have luck using these functions, write a email to clickhouse-feedback@yandex-team.com. [#17423](https://github.com/ClickHouse/ClickHouse/pull/17423) ([alexey-milovidov](https://github.com/alexey-milovidov)).
 * Prohibit toUnixTimestamp(Date()) (before it just returns UInt16 representation of Date). [#17376](https://github.com/ClickHouse/ClickHouse/pull/17376) ([Azat Khuzhin](https://github.com/azat)).
 * Allow using extended integer types (`Int128`, `Int256`, `UInt256`) in `avg` and `avgWeighted` functions. Also allow using different types (integer, decimal, floating point) for value and for weight in `avgWeighted` function. This is a backward-incompatible change: now the `avg` and `avgWeighted` functions always return `Float64` (as documented). Before this change the return type for `Decimal` arguments was also `Decimal`. [#15419](https://github.com/ClickHouse/ClickHouse/pull/15419) ([Mike](https://github.com/myrrc)).
+* Expression `toUUID(N)` no longer works. Replace with `toUUID('00000000-0000-0000-0000-000000000000')`. This change is motivated by non-obvious results of `toUUID(N)` where N is non zero.
+* SSL Certificates with incorrect "key usage" are rejected. In previous versions they are used to work.

 #### New Feature

--- a/contrib/librdkafka
+++ b/contrib/librdkafka
@ -1 +1 @@
-Subproject commit f2f6616419d567c9198aef0d1133a2e9b4f02276
+Subproject commit cf11d0aa36d4738f2c9bf4377807661660f1be76
--- a/contrib/librdkafka-cmake/CMakeLists.txt
+++ b/contrib/librdkafka-cmake/CMakeLists.txt
@ -2,26 +2,25 @@ set(RDKAFKA_SOURCE_DIR ${ClickHouse_SOURCE_DIR}/contrib/librdkafka/src)

 set(SRCS
  ${RDKAFKA_SOURCE_DIR}/crc32c.c
-  ${RDKAFKA_SOURCE_DIR}/rdkafka_zstd.c
 #  ${RDKAFKA_SOURCE_DIR}/lz4.c
 #  ${RDKAFKA_SOURCE_DIR}/lz4frame.c
 #  ${RDKAFKA_SOURCE_DIR}/lz4hc.c
-  ${RDKAFKA_SOURCE_DIR}/rdxxhash.c
-#  ${RDKAFKA_SOURCE_DIR}/regexp.c
  ${RDKAFKA_SOURCE_DIR}/rdaddr.c
  ${RDKAFKA_SOURCE_DIR}/rdavl.c
  ${RDKAFKA_SOURCE_DIR}/rdbuf.c
  ${RDKAFKA_SOURCE_DIR}/rdcrc32.c
  ${RDKAFKA_SOURCE_DIR}/rddl.c
  ${RDKAFKA_SOURCE_DIR}/rdfnv1a.c
+  ${RDKAFKA_SOURCE_DIR}/rdgz.c
  ${RDKAFKA_SOURCE_DIR}/rdhdrhistogram.c
-  ${RDKAFKA_SOURCE_DIR}/rdkafka.c
  ${RDKAFKA_SOURCE_DIR}/rdkafka_admin.c # looks optional
+  ${RDKAFKA_SOURCE_DIR}/rdkafka_assignment.c
  ${RDKAFKA_SOURCE_DIR}/rdkafka_assignor.c
  ${RDKAFKA_SOURCE_DIR}/rdkafka_aux.c # looks optional
  ${RDKAFKA_SOURCE_DIR}/rdkafka_background.c
  ${RDKAFKA_SOURCE_DIR}/rdkafka_broker.c
  ${RDKAFKA_SOURCE_DIR}/rdkafka_buf.c
+  ${RDKAFKA_SOURCE_DIR}/rdkafka.c
  ${RDKAFKA_SOURCE_DIR}/rdkafka_cert.c
  ${RDKAFKA_SOURCE_DIR}/rdkafka_cgrp.c
  ${RDKAFKA_SOURCE_DIR}/rdkafka_conf.c
@ -29,7 +28,9 @@ set(SRCS
  ${RDKAFKA_SOURCE_DIR}/rdkafka_error.c
  ${RDKAFKA_SOURCE_DIR}/rdkafka_event.c
  ${RDKAFKA_SOURCE_DIR}/rdkafka_feature.c
+  ${RDKAFKA_SOURCE_DIR}/rdkafka_header.c
  ${RDKAFKA_SOURCE_DIR}/rdkafka_idempotence.c
+  ${RDKAFKA_SOURCE_DIR}/rdkafka_interceptor.c
  ${RDKAFKA_SOURCE_DIR}/rdkafka_lz4.c
  ${RDKAFKA_SOURCE_DIR}/rdkafka_metadata.c
  ${RDKAFKA_SOURCE_DIR}/rdkafka_metadata_cache.c
@ -49,20 +50,22 @@ set(SRCS
  ${RDKAFKA_SOURCE_DIR}/rdkafka_request.c
  ${RDKAFKA_SOURCE_DIR}/rdkafka_roundrobin_assignor.c
  ${RDKAFKA_SOURCE_DIR}/rdkafka_sasl.c
+#  ${RDKAFKA_SOURCE_DIR}/rdkafka_sasl_cyrus.c # optionally included below
  ${RDKAFKA_SOURCE_DIR}/rdkafka_sasl_oauthbearer.c
  ${RDKAFKA_SOURCE_DIR}/rdkafka_sasl_plain.c
  ${RDKAFKA_SOURCE_DIR}/rdkafka_sasl_scram.c
 #  ${RDKAFKA_SOURCE_DIR}/rdkafka_sasl_win32.c
  ${RDKAFKA_SOURCE_DIR}/rdkafka_ssl.c
+  ${RDKAFKA_SOURCE_DIR}/rdkafka_sticky_assignor.c
  ${RDKAFKA_SOURCE_DIR}/rdkafka_subscription.c
  ${RDKAFKA_SOURCE_DIR}/rdkafka_timer.c
  ${RDKAFKA_SOURCE_DIR}/rdkafka_topic.c
  ${RDKAFKA_SOURCE_DIR}/rdkafka_transport.c
-  ${RDKAFKA_SOURCE_DIR}/rdkafka_interceptor.c
-  ${RDKAFKA_SOURCE_DIR}/rdkafka_header.c
  ${RDKAFKA_SOURCE_DIR}/rdkafka_txnmgr.c
+  ${RDKAFKA_SOURCE_DIR}/rdkafka_zstd.c
  ${RDKAFKA_SOURCE_DIR}/rdlist.c
  ${RDKAFKA_SOURCE_DIR}/rdlog.c
+  ${RDKAFKA_SOURCE_DIR}/rdmap.c
  ${RDKAFKA_SOURCE_DIR}/rdmurmur2.c
  ${RDKAFKA_SOURCE_DIR}/rdports.c
  ${RDKAFKA_SOURCE_DIR}/rdrand.c
@ -70,10 +73,11 @@ set(SRCS
  ${RDKAFKA_SOURCE_DIR}/rdstring.c
  ${RDKAFKA_SOURCE_DIR}/rdunittest.c
  ${RDKAFKA_SOURCE_DIR}/rdvarint.c
+  ${RDKAFKA_SOURCE_DIR}/rdxxhash.c
+  # ${RDKAFKA_SOURCE_DIR}/regexp.c
  ${RDKAFKA_SOURCE_DIR}/snappy.c
  ${RDKAFKA_SOURCE_DIR}/tinycthread.c
  ${RDKAFKA_SOURCE_DIR}/tinycthread_extra.c
-  ${RDKAFKA_SOURCE_DIR}/rdgz.c
 )

 if(${ENABLE_CYRUS_SASL})
--- a/contrib/librdkafka-cmake/config.h.in
+++ b/contrib/librdkafka-cmake/config.h.in
@ -44,9 +44,9 @@
 // atomic_64
 #define ATOMIC_OP(OP1,OP2,PTR,VAL) __atomic_ ## OP1 ## _ ## OP2(PTR, VAL, __ATOMIC_SEQ_CST)
 // parseversion
-#define RDKAFKA_VERSION_STR "0.11.4"
+#define RDKAFKA_VERSION_STR "1.6.0"
 // parseversion
-#define MKL_APP_VERSION "0.11.4"
+#define MKL_APP_VERSION "1.6.0"
 // libdl
 #define WITH_LIBDL 1
 // WITH_PLUGINS
--- a/contrib/libuv
+++ b/contrib/libuv
@ -1 +1 @@
-Subproject commit 84438304f41d8ea6670ee5409f4d6c63ca784f28
+Subproject commit bc14c44b6269c458f2cc7e09eb300f4b64899903
--- a/docker/test/fuzzer/run-fuzzer.sh
+++ b/docker/test/fuzzer/run-fuzzer.sh
@ -1,4 +1,6 @@
 #!/bin/bash
+# shellcheck disable=SC2086
+
 set -eux
 set -o pipefail
 trap "exit" INT TERM
@ -20,6 +22,7 @@ function clone
    git init
    git remote add origin https://github.com/ClickHouse/ClickHouse
    git fetch --depth=1 origin "$SHA_TO_TEST"
+    git fetch --depth=1 origin master # Used to obtain the list of modified or added tests

    # If not master, try to fetch pull/.../{head,merge}
    if [ "$PR_TO_TEST" != "0" ]
@ -33,9 +36,6 @@ function clone

 function download
 {
-#    wget -O- -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/performance/performance.tgz" \
-#        | tar --strip-components=1 -zxv
-
    wget -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/$BINARY_TO_DOWNLOAD/clickhouse"
    chmod +x clickhouse
    ln -s ./clickhouse ./clickhouse-server
@ -73,6 +73,17 @@ function watchdog

 function fuzz
 {
+    # Obtain the list of newly added tests. They will be fuzzed in more extreme way than other tests.
+    cd ch
+    NEW_TESTS=$(git diff --name-only master | grep -P 'tests/queries/0_stateless/.*\.sql' | sed -r -e 's!^!ch/!' | sort -R)
+    cd ..
+    if [[ -n "$NEW_TESTS" ]]
+    then
+        NEW_TESTS_OPT="--interleave-queries-file ${NEW_TESTS}"
+    else
+        NEW_TESTS_OPT=""
+    fi
+
    ./clickhouse-server --config-file db/config.xml -- --path db 2>&1 | tail -100000 > server.log &
    server_pid=$!
    kill -0 $server_pid
@ -85,7 +96,7 @@ function fuzz
    # SC2012: Use find instead of ls to better handle non-alphanumeric filenames. They are all alphanumeric.
    # SC2046: Quote this to prevent word splitting. Actually I need word splitting.
    # shellcheck disable=SC2012,SC2046
-    ./clickhouse-client --query-fuzzer-runs=1000 --queries-file $(ls -1 ch/tests/queries/0_stateless/*.sql | sort -R) \
+    ./clickhouse-client --query-fuzzer-runs=1000 --queries-file $(ls -1 ch/tests/queries/0_stateless/*.sql | sort -R) $NEW_TESTS_OPT \
        > >(tail -n 100000 > fuzzer.log) \
        2>&1 \
        || fuzzer_exit_code=$?
@ -107,7 +118,7 @@ function fuzz

 case "$stage" in
 "")
-    ;&
+    ;&  # Did you know? This is "fallthrough" in bash. https://stackoverflow.com/questions/12010686/case-statement-fallthrough
 "clone")
    time clone
    if [ -v FUZZ_LOCAL_SCRIPT ]
--- a/docker/test/sqlancer/run.sh
+++ b/docker/test/sqlancer/run.sh
@ -10,6 +10,23 @@ dpkg -i package_folder/clickhouse-client_*.deb
 service clickhouse-server start && sleep 5

 cd /sqlancer/sqlancer-master
-CLICKHOUSE_AVAILABLE=true mvn -Dtest=TestClickHouse test

-cp /sqlancer/sqlancer-master/target/surefire-reports/TEST-sqlancer.dbms.TestClickHouse.xml /test_output/result.xml
+export TIMEOUT=60
+export NUM_QUERIES=1000
+
+( java -jar target/SQLancer-*.jar --num-threads 10 --timeout-seconds $TIMEOUT --num-queries $NUM_QUERIES  --username default --password "" clickhouse --oracle TLPWhere | tee /test_output/TLPWhere.out )  3>&1 1>&2 2>&3 | tee /test_output/TLPWhere.err
+( java -jar target/SQLancer-*.jar --num-threads 10 --timeout-seconds $TIMEOUT --num-queries $NUM_QUERIES  --username default --password "" clickhouse --oracle TLPGroupBy | tee /test_output/TLPGroupBy.out )  3>&1 1>&2 2>&3 | tee /test_output/TLPGroupBy.err
+( java -jar target/SQLancer-*.jar --num-threads 10 --timeout-seconds $TIMEOUT --num-queries $NUM_QUERIES  --username default --password "" clickhouse --oracle TLPHaving | tee /test_output/TLPHaving.out )  3>&1 1>&2 2>&3 | tee /test_output/TLPHaving.err
+( java -jar target/SQLancer-*.jar --num-threads 10 --timeout-seconds $TIMEOUT --num-queries $NUM_QUERIES  --username default --password "" clickhouse --oracle TLPWhere --oracle TLPGroupBy | tee /test_output/TLPWhereGroupBy.out )  3>&1 1>&2 2>&3 | tee /test_output/TLPWhereGroupBy.err
+( java -jar target/SQLancer-*.jar --num-threads 10 --timeout-seconds $TIMEOUT --num-queries $NUM_QUERIES  --username default --password "" clickhouse --oracle TLPDistinct | tee /test_output/TLPDistinct.out )  3>&1 1>&2 2>&3 | tee /test_output/TLPDistinct.err
+( java -jar target/SQLancer-*.jar --num-threads 10 --timeout-seconds $TIMEOUT --num-queries $NUM_QUERIES  --username default --password "" clickhouse --oracle TLPAggregate | tee /test_output/TLPAggregate.out )  3>&1 1>&2 2>&3 | tee /test_output/TLPAggregate.err
+
+service clickhouse-server stop && sleep 10
+
+ls /var/log/clickhouse-server/
+tar czf /test_output/logs.tar.gz -C /var/log/clickhouse-server/ .
+tail -n 1000 /var/log/clickhouse-server/stderr.log > /test_output/stderr.log
+tail -n 1000 /var/log/clickhouse-server/stdout.log > /test_output/stdout.log
+tail -n 1000 /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log
+
+ls /test_output
--- a/docs/en/operations/settings/merge-tree-settings.md
+++ b/docs/en/operations/settings/merge-tree-settings.md
@ -0,0 +1,190 @@
+# MergeTree tables settings {#merge-tree-settings}
+
+The values of `merge_tree` settings (for all MergeTree tables) can be viewed in the table `system.merge_tree_settings`, they can be overridden in `config.xml` in the `merge_tree` section, or set in the `SETTINGS` section of each table.
+
+Override example in `config.xml`:
+
+``` text
+<merge_tree>
+    <max_suspicious_broken_parts>5</max_suspicious_broken_parts>
+</merge_tree>
+```
+
+An example to set in `SETTINGS` for a particular table:
+
+``` sql
+CREATE TABLE foo
+(
+    `A` Int64
+)
+ENGINE = MergeTree
+ORDER BY tuple()
+SETTINGS max_suspicious_broken_parts = 500;
+```
+
+An example of changing the settings for a specific table with the `ALTER TABLE ... MODIFY SETTING` command:
+
+``` sql
+ALTER TABLE foo
+    MODIFY SETTING max_suspicious_broken_parts = 100;
+```
+
+## parts_to_throw_insert {#parts-to-throw-insert}
+
+If the number of active parts in a single partition exceeds the `parts_to_throw_insert` value, `INSERT` is interrupted with the `Too many parts (N). Merges are processing significantly slower than inserts` exception.
+
+Possible values:
+
+-   Any positive integer.
+
+Default value: 300.
+
+To achieve maximum performance of `SELECT` queries, it is necessary to minimize the number of parts processed, see [Merge Tree](../../development/architecture.md#merge-tree).
+
+You can set a larger value to 600 (1200), this will reduce the probability of the `Too many parts` error, but at the same time `SELECT` performance might degrade. Also in case of a merge issue (for example, due to insufficient disk space) you will notice it later than it could be with the original 300.
+
+
+## parts_to_delay_insert {#parts-to-delay-insert}
+
+If the number of active parts in a single partition exceeds the `parts_to_delay_insert` value, an `INSERT` artificially slows down.
+
+Possible values:
+
+-   Any positive integer.
+
+Default value: 150.
+
+ClickHouse artificially executes `INSERT` longer (adds ‘sleep’) so that the background merge process can merge parts faster than they are added.
+
+## max_delay_to_insert {#max-delay-to-insert}
+
+The value in seconds, which is used to calculate the `INSERT` delay, if the number of active parts in a single partition exceeds the [parts_to_delay_insert](#parts-to-delay-insert) value.
+
+Possible values:
+
+-   Any positive integer.
+
+Default value: 1.
+
+The delay (in milliseconds) for `INSERT` is calculated by the formula:
+
+```code
+max_k = parts_to_throw_insert - parts_to_delay_insert
+k = 1 + parts_count_in_partition - parts_to_delay_insert
+delay_milliseconds = pow(max_delay_to_insert * 1000, k / max_k)
+```
+
+For example if a partition has 299 active parts and parts_to_throw_insert = 300, parts_to_delay_insert = 150, max_delay_to_insert = 1, `INSERT` is delayed for `pow( 1 * 1000, (1 + 299 - 150) / (300 - 150) ) = 1000` milliseconds.
+
+## max_parts_in_total {#max-parts-in-total}
+
+If the total number of active parts in all partitions of a table exceeds the `max_parts_in_total` value `INSERT` is interrupted with the `Too many parts (N)` exception.
+
+Possible values:
+
+-   Any positive integer.
+
+Default value: 100000.
+
+A large number of parts in a table reduces performance of ClickHouse queries and increases ClickHouse boot time. Most often this is a consequence of an incorrect design (mistakes when choosing a partitioning strategy - too small partitions).
+
+## replicated_deduplication_window {#replicated-deduplication-window}
+
+The number of most recently inserted blocks for which Zookeeper stores hash sums to check for duplicates.
+
+Possible values:
+
+-   Any positive integer.
+-   0 (disable deduplication)
+
+Default value: 100.
+
+The `Insert` command creates one or more blocks (parts). When inserting into Replicated tables, ClickHouse for [insert deduplication](../../engines/table-engines/mergetree-family/replication/) writes the hash sums of the created parts into Zookeeper. Hash sums are stored only for the most recent `replicated_deduplication_window` blocks. The oldest hash sums are removed from Zookeeper.
+A large number of `replicated_deduplication_window` slows down `Inserts` because it needs to compare more entries.
+The hash sum is calculated from the composition of the field names and types and the data of the inserted part (stream of bytes).
+
+## replicated_deduplication_window_seconds {#replicated-deduplication-window-seconds}
+
+The number of seconds after which the hash sums of the inserted blocks are removed from Zookeeper.
+
+Possible values:
+
+-   Any positive integer.
+
+Default value: 604800 (1 week).
+
+Similar to [replicated_deduplication_window](#replicated-deduplication-window), `replicated_deduplication_window_seconds` specifies how long to store hash sums of blocks for insert deduplication. Hash sums older than `replicated_deduplication_window_seconds` are removed from Zookeeper, even if they are less than ` replicated_deduplication_window`.
+
+## old_parts_lifetime {#old-parts-lifetime}
+
+The time (in seconds) of storing inactive parts to protect against data loss during spontaneous server reboots.
+
+Possible values:
+
+-   Any positive integer.
+
+Default value: 480.
+
+`fsync` is not called for new parts, so for some time new parts exist only in the server's RAM (OS cache). If the server is rebooted spontaneously, new parts can be lost or damaged.
+To protect data parts created by merges source parts are not deleted immediately. After merging several parts into a new part, ClickHouse marks the original parts as inactive and deletes them only after `old_parts_lifetime` seconds.
+Inactive parts are removed if they are not used by current queries, i.e. if the `refcount` of the part is zero.
+
+During startup ClickHouse checks the integrity of the parts.
+If the merged part is damaged ClickHouse returns the inactive parts to the active list, and later merges them again. Then the damaged part is renamed (the `broken_` prefix is added) and moved to the `detached` folder.
+If the merged part is not damaged, then the original inactive parts are renamed (the `ignored_` prefix is added) and moved to the `detached` folder.
+
+The default `dirty_expire_centisecs` value (a Linux kernel setting) is 30 seconds (the maximum time that written data is stored only in RAM), but under heavy loads on the disk system data can be written much later. Experimentally, a value of 480 seconds was chosen for `old_parts_lifetime`, during which a new part is guaranteed to be written to disk.
+
+## max_bytes_to_merge_at_max_space_in_pool {#max-bytes-to-merge-at-max-space-in-pool}
+
+The maximum total parts size (in bytes) to be merged into one part, if there are enough resources available.
+`max_bytes_to_merge_at_max_space_in_pool` -- roughly corresponds to the maximum possible part size created by an automatic background merge.
+
+Possible values:
+
+-   Any positive integer.
+
+Default value: 161061273600 (150 GB).
+
+The merge scheduler periodically analyzes the sizes and number of parts in partitions, and if there is enough free resources in the pool, it starts background merges. Merges occur until the total size of the source parts is less than `max_bytes_to_merge_at_max_space_in_pool`.
+
+Merges initiated by `optimize final` ignore `max_bytes_to_merge_at_max_space_in_pool` and merge parts only taking into account available resources (free disk's space) until one part remains in the partition.
+
+## max_bytes_to_merge_at_min_space_in_pool {#max-bytes-to-merge-at-min-space-in-pool}
+
+The maximum total part size (in bytes) to be merged into one part, with the minimum available resources in the background pool.
+
+Possible values:
+
+-   Any positive integer.
+
+Default value: 1048576 (1 MB)
+
+`max_bytes_to_merge_at_min_space_in_pool` defines the maximum total size of parts which can be merged despite the lack of available disk space (in pool). This is necessary to reduce the number of small parts and the chance of `Too many parts` errors.
+Merges book disk space by doubling the total merged parts sizes. Thus, with a small amount of free disk space, a situation may happen that there is free space, but this space is already booked by ongoing large merges, so other merges unable to start, and the number of small parts grows with every insert.
+
+## merge_max_block_size {#merge-max-block-size}
+
+The number of rows that are read from the merged parts into memory.
+
+Possible values:
+
+-   Any positive integer.
+
+Default value: 8192
+
+Merge reads rows from parts in blocks of `merge_max_block_size` rows, then merges and writes the result into a new part. The read block is placed in RAM, so `merge_max_block_size` affects the size of the RAM required for the merge. Thus, merges can consume a large amount of RAM for tables with very wide rows (if the average row size is 100kb, then when merging 10 parts, (100kb * 10 * 8192) = ~ 8GB of RAM). By decreasing `merge_max_block_size`, you can reduce the amount of RAM required for a merge but slow down a merge.
+
+## max_part_loading_threads {#max-part-loading-threads}
+
+The maximum number of threads that read parts when ClickHouse starts.
+
+Possible values:
+
+-   Any positive integer.
+
+Default value: auto (number of CPU cores).
+
+During startup ClickHouse reads all parts of all tables (reads files with metadata of parts) to build a list of all parts in memory. In some systems with a large number of parts this process can take a long time, and this time might be shortened by increasing `max_part_loading_threads` (if this process is not CPU and disk I/O bound).
+
+[Original article](https://clickhouse.tech/docs/en/operations/settings/merge_tree_settings/) <!--hide-->
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@ -2134,6 +2134,21 @@ Default value: `1`.

 -   [ORDER BY Clause](../../sql-reference/statements/select/order-by.md#optimize_read_in_order)

+## optimize_aggregation_in_order {#optimize_aggregation_in_order}
+
+Enables [GROUP BY](../../sql-reference/statements/select/group-by.md) optimization in [SELECT](../../sql-reference/statements/select/index.md) queries for aggregating data in corresponding order in [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) tables.
+
+Possible values:
+
+-   0 — `GROUP BY` optimization is disabled.
+-   1 — `GROUP BY` optimization is enabled.
+
+Default value: `0`.
+
+**See Also**
+
+-   [GROUP BY optimization](../../sql-reference/statements/select/group-by.md#aggregation-in-order)
+
 ## mutations_sync {#mutations_sync}

 Allows to execute `ALTER TABLE ... UPDATE|DELETE` queries ([mutations](../../sql-reference/statements/alter/index.md#mutations)) synchronously.
--- a/docs/en/sql-reference/statements/select/group-by.md
+++ b/docs/en/sql-reference/statements/select/group-by.md
@ -255,6 +255,10 @@ For every different key value encountered, `GROUP BY` calculates a set of aggreg

 Aggregation is one of the most important features of a column-oriented DBMS, and thus it’s implementation is one of the most heavily optimized parts of ClickHouse. By default, aggregation is done in memory using a hash-table. It has 40+ specializations that are chosen automatically depending on “grouping key” data types.

+### GROUP BY Optimization Depending on Table Sorting Key {#aggregation-in-order}
+
+The aggregation can be performed more effectively, if a table is sorted by some key, and `GROUP BY` expression contains at least prefix of sorting key or injective functions. In this case when a new key is read from table, the in-between result of aggregation can be finalized and sent to client. This behaviour is switched on by the [optimize_aggregation_in_order](../../../operations/settings/settings.md#optimize_aggregation_in_order) setting. Such optimization reduces memory usage during aggregation, but in some cases may slow down the query execution. 
+
 ### GROUP BY in External Memory {#select-group-by-in-external-memory}

 You can enable dumping temporary data to the disk to restrict memory usage during `GROUP BY`.
--- a/docs/ru/operations/settings/settings.md
+++ b/docs/ru/operations/settings/settings.md
@ -1995,6 +1995,21 @@ SELECT * FROM a;

 -   [Оптимизация чтения данных](../../sql-reference/statements/select/order-by.md#optimize_read_in_order) в секции `ORDER BY`

+## optimize_aggregation_in_order {#optimize_aggregation_in_order}
+
+Включает или отключает оптимизацию в запросах [SELECT](../../sql-reference/statements/select/index.md) с секцией [GROUP BY](../../sql-reference/statements/select/group-by.md) при наличии подходящих ключей сортировки. Используется при работе с таблицами [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md).
+
+Возможные значения:
+
+-   0 — оптимизация по ключу сортировки отключена.
+-   1 — оптимизация по ключу сортировки включена.
+
+Значение по умолчанию: `0`.
+
+**См. также**
+
+-   [Оптимизация GROUP BY для отсортированных таблиц](../../sql-reference/statements/select/group-by.md#aggregation-in-order)
+
 ## mutations_sync {#mutations_sync}

 Позволяет выполнять запросы `ALTER TABLE ... UPDATE|DELETE` ([мутации](../../sql-reference/statements/alter/index.md#mutations)) синхронно.
--- a/docs/ru/sql-reference/statements/select/group-by.md
+++ b/docs/ru/sql-reference/statements/select/group-by.md
@ -252,6 +252,10 @@ GROUP BY вычисляет для каждого встретившегося

 Агрегация является одной из наиболее важных возможностей столбцовых СУБД, и поэтому её реализация является одной из наиболее сильно оптимизированных частей ClickHouse. По умолчанию агрегирование выполняется в памяти с помощью хэш-таблицы. Она имеет более 40 специализаций, которые выбираются автоматически в зависимости от типов данных ключа группировки.

+### Оптимизация GROUP BY для отсортированных таблиц {#aggregation-in-order}
+
+Агрегирование данных в отсортированных таблицах может выполняться более эффективно, если выражение `GROUP BY` содержит хотя бы префикс ключа сортировки или инъективную функцию с этим ключом. В таких случаях в момент считывания из таблицы нового значения ключа сортировки промежуточный результат агрегирования будет финализироваться и отправляться на клиентскую машину. Чтобы включить такой способ выполнения запроса, используйте настройку [optimize_aggregation_in_order](../../../operations/settings/settings.md#optimize_aggregation_in_order). Подобная оптимизация позволяет сэкономить память во время агрегации, но в некоторых случаях может привести к увеличению времени выполнения запроса. 
+
 ### Группировка во внешней памяти {#select-group-by-in-external-memory}

 Можно включить сброс временных данных на диск, чтобы ограничить потребление оперативной памяти при выполнении `GROUP BY`.
--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@ -141,6 +141,8 @@ private:

    /// If not empty, queries will be read from these files
    std::vector<std::string> queries_files;
+    /// If not empty, run queries from these files before processing every file from 'queries_files'.
+    std::vector<std::string> interleave_queries_files;

    std::unique_ptr<Connection> connection;    /// Connection to DB.
    String full_query; /// Current query as it was given to the client.
@ -802,13 +804,22 @@ private:

        if (!queries_files.empty())
        {
-            for (const auto & queries_file : queries_files)
+            auto process_file = [&](const std::string & file)
            {
                connection->setDefaultDatabase(connection_parameters.default_database);
-                ReadBufferFromFile in(queries_file);
+                ReadBufferFromFile in(file);
                readStringUntilEOF(text, in);
-                if (!processMultiQuery(text))
-                    break;
+                return processMultiQuery(text);
+            };
+
+            for (const auto & queries_file : queries_files)
+            {
+                for (const auto & interleave_file : interleave_queries_files)
+                    if (!process_file(interleave_file))
+                        return;
+
+                if (!process_file(queries_file))
+                    return;
            }
            return;
        }
@ -1041,6 +1052,7 @@ private:

                    continue;
                }
+
                return true;
            }

@ -2457,7 +2469,9 @@ public:
            ("highlight", po::value<bool>()->default_value(true), "enable or disable basic syntax highlight in interactive command line")
            ("log-level", po::value<std::string>(), "client log level")
            ("server_logs_file", po::value<std::string>(), "put server logs into specified file")
-            ("query-fuzzer-runs", po::value<int>()->default_value(0), "query fuzzer runs")
+            ("query-fuzzer-runs", po::value<int>()->default_value(0), "After executing every SELECT query, do random mutations in it and run again specified number of times. This is used for testing to discover unexpected corner cases.")
+            ("interleave-queries-file", po::value<std::vector<std::string>>()->multitoken(),
+                "file path with queries to execute before every file from 'queries-file'; multiple files can be specified (--queries-file file1 file2...); this is needed to enable more aggressive fuzzing of newly added tests (see 'query-fuzzer-runs' option)")
            ("opentelemetry-traceparent", po::value<std::string>(), "OpenTelemetry traceparent header as described by W3C Trace Context recommendation")
            ("opentelemetry-tracestate", po::value<std::string>(), "OpenTelemetry tracestate header as described by W3C Trace Context recommendation")
            ("history_file", po::value<std::string>(), "path to history file")
@ -2572,6 +2586,8 @@ public:
            config().setString("query", options["query"].as<std::string>());
        if (options.count("queries-file"))
            queries_files = options["queries-file"].as<std::vector<std::string>>();
+        if (options.count("interleave-queries-file"))
+            interleave_queries_files = options["interleave-queries-file"].as<std::vector<std::string>>();
        if (options.count("database"))
            config().setString("database", options["database"].as<std::string>());
        if (options.count("pager"))
--- a/src/Core/MySQL/MySQLGtid.cpp
+++ b/src/Core/MySQL/MySQLGtid.cpp
@ -85,8 +85,8 @@ void GTIDSets::update(const GTID & other)
                        ErrorCodes::LOGICAL_ERROR);
                }

-                /// Try to shirnk Sequence interval.
-                GTIDSet::tryShirnk(set, i, current);
+                /// Try to shrink Sequence interval.
+                GTIDSet::tryShrink(set, i, current);

                /// Sequence, extend the interval.
                if (other.seq_no == current.end)
@ -119,7 +119,7 @@ void GTIDSets::update(const GTID & other)
    sets.emplace_back(set);
 }

-void GTIDSet::tryShirnk(GTIDSet & set, unsigned int i, GTIDSet::Interval & current)
+void GTIDSet::tryShrink(GTIDSet & set, unsigned int i, GTIDSet::Interval & current)
 {
    if (i != set.intervals.size() -1)
    {
--- a/src/Core/MySQL/MySQLGtid.h
+++ b/src/Core/MySQL/MySQLGtid.h
@ -27,7 +27,7 @@ public:

    void tryMerge(size_t i);

-    static void tryShirnk(GTIDSet & set, unsigned int i, Interval & current);
+    static void tryShrink(GTIDSet & set, unsigned int i, Interval & current);
 };

 class GTIDSets
--- a/src/Core/tests/mysql_protocol.cpp
+++ b/src/Core/tests/mysql_protocol.cpp
@ -262,12 +262,12 @@ int main(int argc, char ** argv)
             "20662d71-9d91-11ea-bbc2-0242ac110003:9",
             "10662d71-9d91-11ea-bbc2-0242ac110003:6-7,20662d71-9d91-11ea-bbc2-0242ac110003:9"},

-            {"shirnk-sequence",
+            {"shrink-sequence",
              "10662d71-9d91-11ea-bbc2-0242ac110003:1-3:4-5:7",
             "10662d71-9d91-11ea-bbc2-0242ac110003:6",
             "10662d71-9d91-11ea-bbc2-0242ac110003:1-7"},

-            {"shirnk-sequence",
+            {"shrink-sequence",
             "10662d71-9d91-11ea-bbc2-0242ac110003:1-3:4-5:10",
             "10662d71-9d91-11ea-bbc2-0242ac110003:8",
             "10662d71-9d91-11ea-bbc2-0242ac110003:1-5:8:10"
--- a/src/Databases/MySQL/MaterializeMySQLSyncThread.cpp
+++ b/src/Databases/MySQL/MaterializeMySQLSyncThread.cpp
@ -95,19 +95,22 @@ static void checkMySQLVariables(const mysqlxx::Pool::Entry & connection)
         "(Variable_name = 'log_bin' AND upper(Value) = 'ON') "
         "OR (Variable_name = 'binlog_format' AND upper(Value) = 'ROW') "
         "OR (Variable_name = 'binlog_row_image' AND upper(Value) = 'FULL') "
-         "OR (Variable_name = 'default_authentication_plugin' AND upper(Value) = 'MYSQL_NATIVE_PASSWORD');";
+         "OR (Variable_name = 'default_authentication_plugin' AND upper(Value) = 'MYSQL_NATIVE_PASSWORD') "
+         "OR (Variable_name = 'log_bin_use_v1_row_events' AND upper(Value) = 'OFF');";

    MySQLBlockInputStream variables_input(connection, check_query, variables_header, DEFAULT_BLOCK_SIZE, false, true);

    Block variables_block = variables_input.read();
-    if (!variables_block || variables_block.rows() != 4)
+    if (!variables_block || variables_block.rows() != 5)
    {
        std::unordered_map<String, String> variables_error_message{
            {"log_bin", "log_bin = 'ON'"},
            {"binlog_format", "binlog_format='ROW'"},
            {"binlog_row_image", "binlog_row_image='FULL'"},
-            {"default_authentication_plugin", "default_authentication_plugin='mysql_native_password'"}
+            {"default_authentication_plugin", "default_authentication_plugin='mysql_native_password'"},
+            {"log_bin_use_v1_row_events", "log_bin_use_v1_row_events='OFF'"}
        };
+
        ColumnPtr variable_name_column = variables_block.getByName("Variable_name").column;

        for (size_t index = 0; index < variables_block.rows(); ++index)
--- a/src/Interpreters/InterpreterExistsQuery.cpp
+++ b/src/Interpreters/InterpreterExistsQuery.cpp
@ -10,7 +10,6 @@
 #include <Access/AccessFlags.h>
 #include <Common/typeid_cast.h>

-
 namespace DB
 {

@ -40,6 +39,7 @@ BlockInputStreamPtr InterpreterExistsQuery::executeImpl()
 {
    ASTQueryWithTableAndOutput * exists_query;
    bool result = false;
+
    if ((exists_query = query_ptr->as<ASTExistsTableQuery>()))
    {
        if (exists_query->temporary)
@ -57,8 +57,8 @@ BlockInputStreamPtr InterpreterExistsQuery::executeImpl()
    {
        String database = context.resolveDatabase(exists_query->database);
        context.checkAccess(AccessType::SHOW_TABLES, database, exists_query->table);
-        auto tbl = DatabaseCatalog::instance().tryGetTable({database, exists_query->table}, context);
-        result = tbl != nullptr && tbl->isView();
+        auto table = DatabaseCatalog::instance().tryGetTable({database, exists_query->table}, context);
+        result = table && table->isView();
    }
    else if ((exists_query = query_ptr->as<ASTExistsDatabaseQuery>()))
    {
--- a/src/Parsers/New/AST/DropQuery.cpp
+++ b/src/Parsers/New/AST/DropQuery.cpp
@ -41,6 +41,16 @@ DropQuery::createDropTable(bool detach, bool if_exists, bool temporary, PtrTo<Ta
    return query;
 }

+// static
+PtrTo<DropQuery>
+DropQuery::createDropView(bool detach, bool if_exists, PtrTo<TableIdentifier> identifier, PtrTo<ClusterClause> cluster)
+{
+    auto query = PtrTo<DropQuery>(new DropQuery(cluster, QueryType::VIEW, {identifier}));
+    query->detach = detach;
+    query->if_exists = if_exists;
+    return query;
+}
+
 DropQuery::DropQuery(PtrTo<ClusterClause> cluster, QueryType type, PtrList exprs) : DDLQuery(cluster, exprs), query_type(type)
 {
 }
@ -73,6 +83,14 @@ ASTPtr DropQuery::convertToOld() const
                query->database = database->getName();
            break;
        }
+        case QueryType::VIEW:
+        {
+            query->is_view = true;
+            query->table = get<TableIdentifier>(NAME)->getName();
+            if (auto database = get<TableIdentifier>(NAME)->getDatabase())
+                query->database = database->getName();
+            break;
+        }
    }

    convertToOldPartially(query);
@ -100,6 +118,8 @@ antlrcpp::Any ParseTreeVisitor::visitDropTableStmt(ClickHouseParser::DropTableSt
        return DropQuery::createDropTable(!!ctx->DETACH(), !!ctx->EXISTS(), !!ctx->TEMPORARY(), visit(ctx->tableIdentifier()), cluster);
    if (ctx->DICTIONARY())
        return DropQuery::createDropDictionary(!!ctx->DETACH(), !!ctx->EXISTS(), visit(ctx->tableIdentifier()), cluster);
+    if (ctx->VIEW())
+        return DropQuery::createDropView(!!ctx->DETACH(), !!ctx->EXISTS(), visit(ctx->tableIdentifier()), cluster);
    __builtin_unreachable();
 }

--- a/src/Parsers/New/AST/DropQuery.h
+++ b/src/Parsers/New/AST/DropQuery.h
@ -15,6 +15,8 @@ class DropQuery : public DDLQuery
        createDropTable(bool detach, bool if_exists, bool temporary, PtrTo<TableIdentifier> identifier, PtrTo<ClusterClause> cluster);
        static PtrTo<DropQuery>
        createDropDictionary(bool detach, bool if_exists, PtrTo<TableIdentifier> identifier, PtrTo<ClusterClause> cluster);
+        static PtrTo<DropQuery>
+        createDropView(bool detach, bool if_exists, PtrTo<TableIdentifier> identifier, PtrTo<ClusterClause> cluster);

        ASTPtr convertToOld() const override;

@ -29,6 +31,7 @@ class DropQuery : public DDLQuery
            DATABASE,
            DICTIONARY,
            TABLE,
+            VIEW,
        };

        const QueryType query_type;
--- a/src/Parsers/New/AST/ExistsQuery.cpp
+++ b/src/Parsers/New/AST/ExistsQuery.cpp
@ -10,31 +10,51 @@
 namespace DB::AST
 {

-ExistsQuery::ExistsQuery(QueryType type, bool temporary_, PtrTo<TableIdentifier> identifier)
-    : Query{identifier}, query_type(type), temporary(temporary_)
+ExistsQuery::ExistsQuery(QueryType type, bool temporary_, PtrList exprs)
+    : Query(exprs), query_type(type), temporary(temporary_)
 {
 }

+// static
+PtrTo<ExistsQuery> ExistsQuery::createTable(QueryType type, bool temporary, PtrTo<TableIdentifier> identifier)
+{
+    return PtrTo<ExistsQuery>(new ExistsQuery(type, temporary, {identifier}));
+}
+
+// static
+PtrTo<ExistsQuery> ExistsQuery::createDatabase(PtrTo<DatabaseIdentifier> identifier)
+{
+    return PtrTo<ExistsQuery>(new ExistsQuery(QueryType::DATABASE, false, {identifier}));
+}
+
 ASTPtr ExistsQuery::convertToOld() const
 {
    std::shared_ptr<ASTQueryWithTableAndOutput> query;

    switch(query_type)
    {
+        case QueryType::DATABASE:
+            query = std::make_shared<ASTExistsDatabaseQuery>();
+            tryGetIdentifierNameInto(get<DatabaseIdentifier>(IDENTIFIER)->convertToOld(), query->database);
+            return query;
+
        case QueryType::DICTIONARY:
            query = std::make_shared<ASTExistsDictionaryQuery>();
            break;
        case QueryType::TABLE:
            query = std::make_shared<ASTExistsTableQuery>();
-            query->temporary = temporary;
+            break;
+        case QueryType::VIEW:
+            query = std::make_shared<ASTExistsViewQuery>();
            break;
    }

    // FIXME: this won't work if table doesn't exist
-    auto table_id = getTableIdentifier(get(TABLE)->convertToOld());
+    auto table_id = getTableIdentifier(get<TableIdentifier>(IDENTIFIER)->convertToOld());
    query->database = table_id.database_name;
    query->table = table_id.table_name;
    query->uuid = table_id.uuid;
+    query->temporary = temporary;

    return query;
 }
@ -46,10 +66,22 @@ namespace DB

 using namespace AST;

-antlrcpp::Any ParseTreeVisitor::visitExistsStmt(ClickHouseParser::ExistsStmtContext *ctx)
+antlrcpp::Any ParseTreeVisitor::visitExistsTableStmt(ClickHouseParser::ExistsTableStmtContext *ctx)
 {
-    auto type = ctx->TABLE() ? ExistsQuery::QueryType::TABLE : ExistsQuery::QueryType::DICTIONARY;
-    return std::make_shared<ExistsQuery>(type, !!ctx->TEMPORARY(), visit(ctx->tableIdentifier()));
+    ExistsQuery::QueryType type;
+    if (ctx->DICTIONARY())
+        type = ExistsQuery::QueryType::DICTIONARY;
+    else if (ctx->VIEW())
+        type = ExistsQuery::QueryType::VIEW;
+    else // Query 'EXISTS <table_name>' is interptered as 'EXISTS TABLE <table_name>'
+        type = ExistsQuery::QueryType::TABLE;
+
+    return ExistsQuery::createTable(type, !!ctx->TEMPORARY(), visit(ctx->tableIdentifier()));
+}
+
+antlrcpp::Any ParseTreeVisitor::visitExistsDatabaseStmt(ClickHouseParser::ExistsDatabaseStmtContext *ctx)
+{
+    return ExistsQuery::createDatabase(visit(ctx->databaseIdentifier()));
 }

 }
--- a/src/Parsers/New/AST/ExistsQuery.h
+++ b/src/Parsers/New/AST/ExistsQuery.h
@ -13,16 +13,21 @@ class ExistsQuery : public Query
        {
            DICTIONARY,
            TABLE,
+            VIEW,
+            DATABASE,
        };

-        ExistsQuery(QueryType type, bool temporary, PtrTo<TableIdentifier> identifier);
+        static PtrTo<ExistsQuery> createTable(QueryType type, bool temporary, PtrTo<TableIdentifier> identifier);
+        static PtrTo<ExistsQuery> createDatabase(PtrTo<DatabaseIdentifier> identifier);
+
+        ExistsQuery(QueryType type, bool temporary, PtrList exprs);

        ASTPtr convertToOld() const override;

    private:
        enum ChildIndex : UInt8
        {
-            TABLE = 0,  // TableIdentifier
+            IDENTIFIER = 0,  // DatabaseIdentifier or TableIdentifier
        };

        const QueryType query_type;
--- a/src/Parsers/New/ClickHouseParser.cpp
+++ b/src/Parsers/New/ClickHouseParser.cpp
--- a/src/Parsers/New/ClickHouseParser.g4
+++ b/src/Parsers/New/ClickHouseParser.g4
@ -173,12 +173,15 @@ describeStmt: (DESCRIBE | DESC) TABLE? tableExpr;

 dropStmt
    : (DETACH | DROP) DATABASE (IF EXISTS)? databaseIdentifier clusterClause?                                  # DropDatabaseStmt
-    | (DETACH | DROP) (DICTIONARY | TEMPORARY? TABLE) (IF EXISTS)? tableIdentifier clusterClause? (NO DELAY)?  # DropTableStmt
+    | (DETACH | DROP) (DICTIONARY | TEMPORARY? TABLE | VIEW) (IF EXISTS)? tableIdentifier clusterClause? (NO DELAY)?  # DropTableStmt
    ;

 // EXISTS statement

-existsStmt: EXISTS (DICTIONARY | TEMPORARY? TABLE)? tableIdentifier;
+existsStmt
+    : EXISTS DATABASE databaseIdentifier                             # ExistsDatabaseStmt
+    | EXISTS (DICTIONARY | TEMPORARY? TABLE | VIEW)? tableIdentifier # ExistsTableStmt
+    ;

 // EXPLAIN statement

--- a/src/Parsers/New/ClickHouseParser.h
+++ b/src/Parsers/New/ClickHouseParser.h
--- a/src/Parsers/New/ClickHouseParserVisitor.h
+++ b/src/Parsers/New/ClickHouseParserVisitor.h
@ -164,7 +164,9 @@ public:

    virtual antlrcpp::Any visitDropTableStmt(ClickHouseParser::DropTableStmtContext *context) = 0;

-    virtual antlrcpp::Any visitExistsStmt(ClickHouseParser::ExistsStmtContext *context) = 0;
+    virtual antlrcpp::Any visitExistsDatabaseStmt(ClickHouseParser::ExistsDatabaseStmtContext *context) = 0;
+
+    virtual antlrcpp::Any visitExistsTableStmt(ClickHouseParser::ExistsTableStmtContext *context) = 0;

    virtual antlrcpp::Any visitExplainStmt(ClickHouseParser::ExplainStmtContext *context) = 0;

--- a/src/Parsers/New/ParseTreeVisitor.h
+++ b/src/Parsers/New/ParseTreeVisitor.h
@ -136,7 +136,8 @@ public:
    antlrcpp::Any visitTtlExpr(ClickHouseParser::TtlExprContext * ctx) override;

    // ExistsQuery
-    antlrcpp::Any visitExistsStmt(ClickHouseParser::ExistsStmtContext * ctx) override;
+    antlrcpp::Any visitExistsTableStmt(ClickHouseParser::ExistsTableStmtContext * ctx) override;
+    antlrcpp::Any visitExistsDatabaseStmt(ClickHouseParser::ExistsDatabaseStmtContext * ctx) override;

    // ExplainQuery
    antlrcpp::Any visitExplainStmt(ClickHouseParser::ExplainStmtContext * ctx) override;
--- a/src/Parsers/TablePropertiesQueriesASTs.h
+++ b/src/Parsers/TablePropertiesQueriesASTs.h
@ -76,7 +76,6 @@ struct ASTDescribeQueryExistsQueryIDAndQueryNames
    static constexpr auto QueryTemporary = "DESCRIBE TEMPORARY TABLE";
 };

-using ASTExistsDatabaseQuery = ASTQueryWithTableAndOutputImpl<ASTExistsDatabaseQueryIDAndQueryNames>;
 using ASTExistsTableQuery = ASTQueryWithTableAndOutputImpl<ASTExistsTableQueryIDAndQueryNames>;
 using ASTExistsViewQuery = ASTQueryWithTableAndOutputImpl<ASTExistsViewQueryIDAndQueryNames>;
 using ASTExistsDictionaryQuery = ASTQueryWithTableAndOutputImpl<ASTExistsDictionaryQueryIDAndQueryNames>;
@ -84,6 +83,16 @@ using ASTShowCreateTableQuery = ASTQueryWithTableAndOutputImpl<ASTShowCreateTabl
 using ASTShowCreateViewQuery = ASTQueryWithTableAndOutputImpl<ASTShowCreateViewQueryIDAndQueryNames>;
 using ASTShowCreateDictionaryQuery = ASTQueryWithTableAndOutputImpl<ASTShowCreateDictionaryQueryIDAndQueryNames>;

+class ASTExistsDatabaseQuery : public ASTQueryWithTableAndOutputImpl<ASTExistsDatabaseQueryIDAndQueryNames>
+{
+protected:
+    void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override
+    {
+        settings.ostr << (settings.hilite ? hilite_keyword : "") << ASTExistsDatabaseQueryIDAndQueryNames::Query
+                    << " " << (settings.hilite ? hilite_none : "") << backQuoteIfNeed(database);
+    }
+};
+
 class ASTShowCreateDatabaseQuery : public ASTQueryWithTableAndOutputImpl<ASTShowCreateDatabaseQueryIDAndQueryNames>
 {
 protected:
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@ -4652,7 +4652,9 @@ bool StorageReplicatedMergeTree::waitForTableReplicaToProcessLogEntry(
        bool stop_waiting_non_active = !wait_for_non_active && !getZooKeeper()->exists(table_zookeeper_path + "/replicas/" + replica + "/is_active");
        return stop_waiting_itself || stop_waiting_non_active;
    };
-    constexpr auto event_wait_timeout_ms = 1000;
+
+    /// Don't recheck ZooKeeper too often
+    constexpr auto event_wait_timeout_ms = 3000;

    if (startsWith(entry.znode_name, "log-"))
    {
@ -4673,10 +4675,11 @@ bool StorageReplicatedMergeTree::waitForTableReplicaToProcessLogEntry(
            if (!log_pointer.empty() && parse<UInt64>(log_pointer) > log_index)
                break;

-            if (wait_for_non_active)
-                event->wait();
-            else
-                event->tryWait(event_wait_timeout_ms);
+            /// Wait with timeout because we can be already shut down, but not dropped.
+            /// So log_pointer node will exist, but we will never update it because all background threads already stopped.
+            /// It can lead to query hung because table drop query can wait for some query (alter, optimize, etc) which called this method,
+            /// but the query will never finish because the drop already shut down the table.
+            event->tryWait(event_wait_timeout_ms);
        }
    }
    else if (startsWith(entry.znode_name, "queue-"))
@ -4721,10 +4724,11 @@ bool StorageReplicatedMergeTree::waitForTableReplicaToProcessLogEntry(
                if (!log_pointer_new.empty() && parse<UInt64>(log_pointer_new) > log_index)
                    break;

-                if (wait_for_non_active)
-                    event->wait();
-                else
-                    event->tryWait(event_wait_timeout_ms);
+                /// Wait with timeout because we can be already shut down, but not dropped.
+                /// So log_pointer node will exist, but we will never update it because all background threads already stopped.
+                /// It can lead to query hung because table drop query can wait for some query (alter, optimize, etc) which called this method,
+                /// but the query will never finish because the drop already shut down the table.
+                event->tryWait(event_wait_timeout_ms);
            }
        }
    }
--- a/tests/queries/0_stateless/01048_exists_query.sql
+++ b/tests/queries/0_stateless/01048_exists_query.sql
@ -28,7 +28,7 @@ EXISTS t_01048; -- Does not work for temporary tables. Maybe have to fix.
 EXISTS TABLE t_01048;
 EXISTS DICTIONARY t_01048;

-CREATE DICTIONARY db_01048.t_01048 (k UInt64, v String) PRIMARY KEY k LAYOUT(FLAT()) SOURCE(HTTP(URL 'http://example.test/' FORMAT TSV)) LIFETIME(1000);
+CREATE DICTIONARY db_01048.t_01048 (k UInt64, v String) PRIMARY KEY k LAYOUT(FLAT()) SOURCE(HTTP(URL 'http://example.test/' FORMAT 'TSV')) LIFETIME(1000);
 EXISTS db_01048.t_01048;
 EXISTS TABLE db_01048.t_01048; -- Dictionaries are tables as well. But not all tables are dictionaries.
 EXISTS DICTIONARY db_01048.t_01048;
--- a/tests/queries/0_stateless/01650_fetch_patition_with_macro_in_zk_path.reference
+++ b/tests/queries/0_stateless/01650_fetch_patition_with_macro_in_zk_path.reference
@ -0,0 +1,2 @@
+202101
+202101_0_0_0	1	2021-01-01	some
--- a/tests/queries/0_stateless/01650_fetch_patition_with_macro_in_zk_path.sql
+++ b/tests/queries/0_stateless/01650_fetch_patition_with_macro_in_zk_path.sql
@ -0,0 +1,32 @@
+DROP TABLE IF EXISTS test_01640;
+DROP TABLE IF EXISTS restore_01640;
+
+CREATE TABLE test_01640(i Int64, d Date, s String)
+ENGINE = ReplicatedMergeTree('/clickhouse/{shard}/tables/test_01640','{replica}') 
+PARTITION BY toYYYYMM(d) ORDER BY i;
+
+insert into test_01640 values (1, '2021-01-01','some');
+
+CREATE TABLE restore_01640(i Int64, d Date, s String)
+ENGINE = ReplicatedMergeTree('/clickhouse/{shard}/tables/restore_01640','{replica}')
+PARTITION BY toYYYYMM(d) ORDER BY i;
+
+ALTER TABLE restore_01640 FETCH PARTITION tuple(toYYYYMM(toDate('2021-01-01'))) 
+  FROM '/clickhouse/{shard}/tables/test_01640';
+
+SELECT partition_id
+FROM system.detached_parts
+WHERE (table = 'restore_01640') AND (database = currentDatabase());
+
+ALTER TABLE restore_01640 ATTACH PARTITION tuple(toYYYYMM(toDate('2021-01-01')));
+
+SELECT partition_id
+FROM system.detached_parts
+WHERE (table = 'restore_01640') AND (database = currentDatabase());
+
+SELECT _part, * FROM restore_01640;
+
+DROP TABLE test_01640;
+DROP TABLE restore_01640;
+
+
--- a/tests/queries/0_stateless/01655_agg_if_nullable.reference
+++ b/tests/queries/0_stateless/01655_agg_if_nullable.reference
@ -0,0 +1 @@
+2
--- a/tests/queries/0_stateless/01655_agg_if_nullable.sql
+++ b/tests/queries/0_stateless/01655_agg_if_nullable.sql
@ -0,0 +1 @@
+SELECT sumIf(toNullable(1), 1) FROM remote('127.0.0.{1,2}', system.one);
--- a/tests/queries/0_stateless/arcadia_skip_list.txt
+++ b/tests/queries/0_stateless/arcadia_skip_list.txt
@ -185,4 +185,6 @@
 01639_distributed_sync_insert_zero_rows
 01644_distributed_async_insert_fsync_smoke
 01552_impl_aggfunc_cloneresize
+01650_fetch_patition_with_macro_in_zk_path
 01651_bugs_from_15889
+01655_agg_if_nullable
--- a/tests/queries/skip_list.json
+++ b/tests/queries/skip_list.json
@ -162,7 +162,6 @@
        "01039_test_setting_parse",
        "01042_system_reload_dictionary_reloads_completely",
        "01045_dictionaries_restrictions",
-        "01048_exists_query",
        "01055_compact_parts_1",
        "01056_create_table_as",
        "01066_bit_count",
@ -197,7 +196,6 @@
        "01190_full_attach_syntax",
        "01191_rename_dictionary",
        "01192_rename_database_zookeeper",
-        "01210_drop_view",
        "01213_alter_rename_column",
        "01232_untuple",
        "01244_optimize_distributed_group_by_sharding_key",
@ -209,7 +207,6 @@
        "01269_create_with_null",
        "01271_show_privileges",
        "01272_offset_without_limit",
-        "01275_parallel_mv",
        "01277_alter_rename_column_constraint_zookeeper",
        "01278_min_insert_block_size_rows_for_materialized_views",
        "01280_min_map_max_map",
--- a/tests/tsan_suppressions.txt
+++ b/tests/tsan_suppressions.txt
@ -1,4 +1,2 @@
-# looks like a bug in clang-11 thread sanitizer, detects normal data race with random FD in this method
-race:DB::LazyPipeFDs::close
 # races in openSSL https://github.com/openssl/openssl/issues/11974
 fun:evp_cipher_cache_constants
				`@ -0,0 +1 @@`
				`SELECT sumIf(toNullable(1), 1) FROM remote('127.0.0.{1,2}', system.one);`