aa

2024-11-21 23:21:59 +00:00 · 2020-12-18 18:48:37 +03:00 · 2020-12-18 18:48:37 +03:00 · 0e235d35f8
commit 0e235d35f8
parent 493ee6732b
416 changed files with 10085 additions and 2626 deletions
--- a/.gitmodules
+++ b/.gitmodules
@ -183,7 +183,7 @@
 	url = https://github.com/kthohr/stats.git
 [submodule "contrib/krb5"]
 	path = contrib/krb5
-	url = https://github.com/krb5/krb5
+	url = https://github.com/ClickHouse-Extras/krb5
 [submodule "contrib/cyrus-sasl"]
 	path = contrib/cyrus-sasl
 	url = https://github.com/cyrusimap/cyrus-sasl
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -1,3 +1,126 @@
+### ClickHouse release 20.12
+
+### ClickHouse release v20.12.3.3-stable, 2020-12-13
+
+#### Backward Incompatible Change
+
+* Enable `use_compact_format_in_distributed_parts_names` by default (see the documentation for the reference). [#16728](https://github.com/ClickHouse/ClickHouse/pull/16728) ([Azat Khuzhin](https://github.com/azat)).
+* Accept user settings related to file formats (e.g. `format_csv_delimiter`) in the `SETTINGS` clause when creating a table that uses `File` engine, and use these settings in all `INSERT`s and `SELECT`s. The file format settings changed in the current user session, or in the `SETTINGS` clause of a DML query itself, no longer affect the query. [#16591](https://github.com/ClickHouse/ClickHouse/pull/16591) ([Alexander Kuzmenkov](https://github.com/akuzm)).
+
+#### New Feature
+
+* add `*.xz` compression/decompression support.It enables using `*.xz` in `file()` function. This closes [#8828](https://github.com/ClickHouse/ClickHouse/issues/8828). [#16578](https://github.com/ClickHouse/ClickHouse/pull/16578) ([Abi Palagashvili](https://github.com/fibersel)).
+* Introduce the query `ALTER TABLE ... DROP|DETACH PART 'part_name'`. [#15511](https://github.com/ClickHouse/ClickHouse/pull/15511) ([nvartolomei](https://github.com/nvartolomei)).
+* Added new ALTER UPDATE/DELETE IN PARTITION syntax. [#13403](https://github.com/ClickHouse/ClickHouse/pull/13403) ([Vladimir Chebotarev](https://github.com/excitoon)).
+* Allow formatting named tuples as JSON objects when using JSON input/output formats, controlled by the `output_format_json_named_tuples_as_objects` setting, disabled by default. [#17175](https://github.com/ClickHouse/ClickHouse/pull/17175) ([Alexander Kuzmenkov](https://github.com/akuzm)).
+* Add a possibility to input enum value as it's id in TSV and CSV formats by default. [#16834](https://github.com/ClickHouse/ClickHouse/pull/16834) ([Kruglov Pavel](https://github.com/Avogar)).
+* Add COLLATE support for Nullable, LowCardinality, Array and Tuple, where nested type is String. Also refactor the code associated with collations in ColumnString.cpp. [#16273](https://github.com/ClickHouse/ClickHouse/pull/16273) ([Kruglov Pavel](https://github.com/Avogar)).
+* New `tcpPort` function returns TCP port listened by this server. [#17134](https://github.com/ClickHouse/ClickHouse/pull/17134) ([Ivan](https://github.com/abyss7)).
+* Add new math functions: `acosh`, `asinh`, `atan2`, `atanh`, `cosh`, `hypot`, `log1p`, `sinh`. [#16636](https://github.com/ClickHouse/ClickHouse/pull/16636) ([Konstantin Malanchev](https://github.com/hombit)).
+* Possibility to distribute the merges between different replicas. Introduces the `execute_merges_on_single_replica_time_threshold` mergetree setting. [#16424](https://github.com/ClickHouse/ClickHouse/pull/16424) ([filimonov](https://github.com/filimonov)).
+* Add setting `aggregate_functions_null_for_empty` for SQL standard compatibility. This option will rewrite all aggregate functions in a query, adding -OrNull suffix to them. Implements [10273](https://github.com/ClickHouse/ClickHouse/issues/10273). [#16123](https://github.com/ClickHouse/ClickHouse/pull/16123) ([flynn](https://github.com/ucasFL)).
+* Updated DateTime, DateTime64 parsing to accept string Date literal format. [#16040](https://github.com/ClickHouse/ClickHouse/pull/16040) ([Maksim Kita](https://github.com/kitaisreal)).
+* Make it possible to change the path to history file in `clickhouse-client` using the `--history_file` parameter. [#15960](https://github.com/ClickHouse/ClickHouse/pull/15960) ([Maksim Kita](https://github.com/kitaisreal)).
+
+#### Bug Fix
+
+* Fix the issue when server can stop accepting connections in very rare cases. [#17542](https://github.com/ClickHouse/ClickHouse/pull/17542) ([Amos Bird](https://github.com/amosbird)).
+* Fixed `Function not implemented` error when executing `RENAME` query in `Atomic` database with ClickHouse running on Windows Subsystem for Linux. Fixes [#17661](https://github.com/ClickHouse/ClickHouse/issues/17661). [#17664](https://github.com/ClickHouse/ClickHouse/pull/17664) ([tavplubix](https://github.com/tavplubix)).
+* Do not restore parts from WAL if `in_memory_parts_enable_wal` is disabled. [#17802](https://github.com/ClickHouse/ClickHouse/pull/17802) ([detailyang](https://github.com/detailyang)).
+* fix incorrect initialization of `max_compress_block_size` of MergeTreeWriterSettings with `min_compress_block_size`. [#17833](https://github.com/ClickHouse/ClickHouse/pull/17833) ([flynn](https://github.com/ucasFL)).
+* Exception message about max table size to drop was displayed incorrectly. [#17764](https://github.com/ClickHouse/ClickHouse/pull/17764) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fixed possible segfault when there is not enough space when inserting into `Distributed` table. [#17737](https://github.com/ClickHouse/ClickHouse/pull/17737) ([tavplubix](https://github.com/tavplubix)).
+* Fixed problem when ClickHouse fails to resume connection to MySQL servers. [#17681](https://github.com/ClickHouse/ClickHouse/pull/17681) ([Alexander Kazakov](https://github.com/Akazz)).
+* In might be determined incorrectly if cluster is circular- (cross-) replicated or not when executing `ON CLUSTER` query due to race condition when `pool_size` > 1. It's fixed. [#17640](https://github.com/ClickHouse/ClickHouse/pull/17640) ([tavplubix](https://github.com/tavplubix)).
+* Exception `fmt::v7::format_error` can be logged in background for MergeTree tables. This fixes [#17613](https://github.com/ClickHouse/ClickHouse/issues/17613). [#17615](https://github.com/ClickHouse/ClickHouse/pull/17615) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* When clickhouse-client is used in interactive mode with multiline queries, single line comment was erronously extended till the end of query. This fixes [#13654](https://github.com/ClickHouse/ClickHouse/issues/13654). [#17565](https://github.com/ClickHouse/ClickHouse/pull/17565) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fix alter query hang when the corresponding mutation was killed on the different replica. Fixes [#16953](https://github.com/ClickHouse/ClickHouse/issues/16953). [#17499](https://github.com/ClickHouse/ClickHouse/pull/17499) ([alesapin](https://github.com/alesapin)).
+* Fix issue when mark cache size was underestimated by clickhouse. It may happen when there are a lot of tiny files with marks. [#17496](https://github.com/ClickHouse/ClickHouse/pull/17496) ([alesapin](https://github.com/alesapin)).
+* Fix `ORDER BY` with enabled setting `optimize_redundant_functions_in_order_by`. [#17471](https://github.com/ClickHouse/ClickHouse/pull/17471) ([Anton Popov](https://github.com/CurtizJ)).
+* Fix duplicates after `DISTINCT` which were possible because of incorrect optimization. Fixes [#17294](https://github.com/ClickHouse/ClickHouse/issues/17294). [#17296](https://github.com/ClickHouse/ClickHouse/pull/17296) ([li chengxiang](https://github.com/chengxianglibra)). [#17439](https://github.com/ClickHouse/ClickHouse/pull/17439) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix crash while reading from `JOIN` table with `LowCardinality` types. Fixes [#17228](https://github.com/ClickHouse/ClickHouse/issues/17228). [#17397](https://github.com/ClickHouse/ClickHouse/pull/17397) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* fix `toInt256(inf)` stack overflow. Int256 is an experimental feature. Closed [#17235](https://github.com/ClickHouse/ClickHouse/issues/17235). [#17257](https://github.com/ClickHouse/ClickHouse/pull/17257) ([flynn](https://github.com/ucasFL)).
+* Fix possible `Unexpected packet Data received from client` error logged for Distributed queries with `LIMIT`. [#17254](https://github.com/ClickHouse/ClickHouse/pull/17254) ([Azat Khuzhin](https://github.com/azat)).
+* Fix set index invalidation when there are const columns in the subquery. This fixes [#17246](https://github.com/ClickHouse/ClickHouse/issues/17246). [#17249](https://github.com/ClickHouse/ClickHouse/pull/17249) ([Amos Bird](https://github.com/amosbird)).
+* Fix possible wrong index analysis when the types of the index comparison are different. This fixes [#17122](https://github.com/ClickHouse/ClickHouse/issues/17122). [#17145](https://github.com/ClickHouse/ClickHouse/pull/17145) ([Amos Bird](https://github.com/amosbird)).
+* Fix ColumnConst comparison which leads to crash. This fixed [#17088](https://github.com/ClickHouse/ClickHouse/issues/17088) . [#17135](https://github.com/ClickHouse/ClickHouse/pull/17135) ([Amos Bird](https://github.com/amosbird)).
+* Multiple fixed for MaterializeMySQL (experimental feature). Fixes [#16923](https://github.com/ClickHouse/ClickHouse/issues/16923) Fixes [#15883](https://github.com/ClickHouse/ClickHouse/issues/15883) Fix MaterializeMySQL SYNC failure when the modify MySQL binlog_checksum. [#17091](https://github.com/ClickHouse/ClickHouse/pull/17091) ([Winter Zhang](https://github.com/zhang2014)).
+* Fix bug when `ON CLUSTER` queries may hang forever for non-leader ReplicatedMergeTreeTables. [#17089](https://github.com/ClickHouse/ClickHouse/pull/17089) ([alesapin](https://github.com/alesapin)).
+* Fixed crash on `CREATE TABLE ... AS some_table` query when `some_table` was created `AS table_function()` Fixes [#16944](https://github.com/ClickHouse/ClickHouse/issues/16944). [#17072](https://github.com/ClickHouse/ClickHouse/pull/17072) ([tavplubix](https://github.com/tavplubix)).
+* Bug unfinished implementation for funciton fuzzBits, related issue: [#16980](https://github.com/ClickHouse/ClickHouse/issues/16980). [#17051](https://github.com/ClickHouse/ClickHouse/pull/17051) ([hexiaoting](https://github.com/hexiaoting)).
+* Fix LLVM's libunwind in the case when CFA register is RAX. This is the [bug](https://bugs.llvm.org/show_bug.cgi?id=48186) in [LLVM's libunwind](https://github.com/llvm/llvm-project/tree/master/libunwind). We already have workarounds for this bug. [#17046](https://github.com/ClickHouse/ClickHouse/pull/17046) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Avoid unnecessary network errors for remote queries which may be cancelled while execution, like queries with `LIMIT`. [#17006](https://github.com/ClickHouse/ClickHouse/pull/17006) ([Azat Khuzhin](https://github.com/azat)).
+* Fix `optimize_distributed_group_by_sharding_key` setting (that is disabled by default) for query with OFFSET only. [#16996](https://github.com/ClickHouse/ClickHouse/pull/16996) ([Azat Khuzhin](https://github.com/azat)).
+* Fix for Merge tables over Distributed tables with JOIN. [#16993](https://github.com/ClickHouse/ClickHouse/pull/16993) ([Azat Khuzhin](https://github.com/azat)).
+* Fixed wrong result in big integers (128, 256 bit) when casting from double. Big integers support is experimental. [#16986](https://github.com/ClickHouse/ClickHouse/pull/16986) ([Mike](https://github.com/myrrc)).
+* Fix possible server crash after `ALTER TABLE ... MODIFY COLUMN ... NewType` when `SELECT` have `WHERE` expression on altering column and alter doesn't finished yet. [#16968](https://github.com/ClickHouse/ClickHouse/pull/16968) ([Amos Bird](https://github.com/amosbird)).
+* Blame info was not calculated correctly in `clickhouse-git-import`. [#16959](https://github.com/ClickHouse/ClickHouse/pull/16959) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fix order by optimization with monotonous functions. Fixes [#16107](https://github.com/ClickHouse/ClickHouse/issues/16107). [#16956](https://github.com/ClickHouse/ClickHouse/pull/16956) ([Anton Popov](https://github.com/CurtizJ)).
+* Fix optimization of group by with enabled setting `optimize_aggregators_of_group_by_keys` and joins. Fixes [#12604](https://github.com/ClickHouse/ClickHouse/issues/12604). [#16951](https://github.com/ClickHouse/ClickHouse/pull/16951) ([Anton Popov](https://github.com/CurtizJ)).
+* Fix possible error `Illegal type of argument` for queries with `ORDER BY`. Fixes [#16580](https://github.com/ClickHouse/ClickHouse/issues/16580). [#16928](https://github.com/ClickHouse/ClickHouse/pull/16928) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix strange code in InterpreterShowAccessQuery. [#16866](https://github.com/ClickHouse/ClickHouse/pull/16866) ([tavplubix](https://github.com/tavplubix)).
+* Prevent clickhouse server crashes when using the function `timeSeriesGroupSum`. The function is removed from newer ClickHouse releases. [#16865](https://github.com/ClickHouse/ClickHouse/pull/16865) ([filimonov](https://github.com/filimonov)).
+* Fix rare silent crashes when query profiler is on and ClickHouse is installed on OS with glibc version that has (supposedly) broken asynchronous unwind tables for some functions. This fixes [#15301](https://github.com/ClickHouse/ClickHouse/issues/15301). This fixes [#13098](https://github.com/ClickHouse/ClickHouse/issues/13098). [#16846](https://github.com/ClickHouse/ClickHouse/pull/16846) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fix crash when using `any` without any arguments. This is for [#16803](https://github.com/ClickHouse/ClickHouse/issues/16803) . cc @azat. [#16826](https://github.com/ClickHouse/ClickHouse/pull/16826) ([Amos Bird](https://github.com/amosbird)).
+* If no memory can be allocated while writing table metadata on disk, broken metadata file can be written. [#16772](https://github.com/ClickHouse/ClickHouse/pull/16772) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fix trivial query optimization with partition predicate. [#16767](https://github.com/ClickHouse/ClickHouse/pull/16767) ([Azat Khuzhin](https://github.com/azat)).
+* Fix `IN` operator over several columns and tuples with enabled `transform_null_in` setting. Fixes [#15310](https://github.com/ClickHouse/ClickHouse/issues/15310). [#16722](https://github.com/ClickHouse/ClickHouse/pull/16722) ([Anton Popov](https://github.com/CurtizJ)).
+* Return number of affected rows for INSERT queries via MySQL protocol. Previously ClickHouse used to always return 0, it's fixed. Fixes [#16605](https://github.com/ClickHouse/ClickHouse/issues/16605). [#16715](https://github.com/ClickHouse/ClickHouse/pull/16715) ([Winter Zhang](https://github.com/zhang2014)).
+* Fix remote query failure when using 'if' suffix aggregate function. Fixes [#16574](https://github.com/ClickHouse/ClickHouse/issues/16574) Fixes [#16231](https://github.com/ClickHouse/ClickHouse/issues/16231) [#16610](https://github.com/ClickHouse/ClickHouse/pull/16610) ([Winter Zhang](https://github.com/zhang2014)).
+* Fix inconsistent behavior caused by `select_sequential_consistency` for optimized trivial count query and system.tables. [#16309](https://github.com/ClickHouse/ClickHouse/pull/16309) ([Hao Chen](https://github.com/haoch)).
+
+#### Improvement
+
+* Remove empty parts after they were pruned by TTL, mutation, or collapsing merge algorithm. [#16895](https://github.com/ClickHouse/ClickHouse/pull/16895) ([Anton Popov](https://github.com/CurtizJ)).
+* Enable compact format of directories for asynchronous sends in Distributed tables: `use_compact_format_in_distributed_parts_names` is set to 1 by default. [#16788](https://github.com/ClickHouse/ClickHouse/pull/16788) ([Azat Khuzhin](https://github.com/azat)).
+* Abort multipart upload if no data was written to S3. [#16840](https://github.com/ClickHouse/ClickHouse/pull/16840) ([Pavel Kovalenko](https://github.com/Jokser)).
+* Reresolve the IP of the `format_avro_schema_registry_url` in case of errors. [#16985](https://github.com/ClickHouse/ClickHouse/pull/16985) ([filimonov](https://github.com/filimonov)).
+* Mask password in data_path in the system.distribution_queue. [#16727](https://github.com/ClickHouse/ClickHouse/pull/16727) ([Azat Khuzhin](https://github.com/azat)).
+* Throw error when use column transformer replaces non existing column. [#16183](https://github.com/ClickHouse/ClickHouse/pull/16183) ([hexiaoting](https://github.com/hexiaoting)).
+* Turn off parallel parsing when there is no enough memory for all threads to work simultaneously. Also there could be exceptions like "Memory limit exceeded" when somebody will try to insert extremely huge rows (> min_chunk_bytes_for_parallel_parsing), because each piece to parse has to be independent set of strings (one or more). [#16721](https://github.com/ClickHouse/ClickHouse/pull/16721) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Install script should always create subdirs in config folders. This is only relevant for Docker build with custom config. [#16936](https://github.com/ClickHouse/ClickHouse/pull/16936) ([filimonov](https://github.com/filimonov)).
+* Correct grammar in error message in JSONEachRow, JSONCompactEachRow, and RegexpRow input formats. [#17205](https://github.com/ClickHouse/ClickHouse/pull/17205) ([nico piderman](https://github.com/sneako)).
+* Set default `host` and `port` parameters for `SOURCE(CLICKHOUSE(...))` to current instance and set default `user` value to `'default'`. [#16997](https://github.com/ClickHouse/ClickHouse/pull/16997) ([vdimir](https://github.com/vdimir)).
+* Throw an informative error message when doing `ATTACH/DETACH TABLE <DICTIONARY>`. Before this PR, `detach table <dict>` works but leads to an ill-formed in-memory metadata. [#16885](https://github.com/ClickHouse/ClickHouse/pull/16885) ([Amos Bird](https://github.com/amosbird)).
+* Add cutToFirstSignificantSubdomainWithWWW(). [#16845](https://github.com/ClickHouse/ClickHouse/pull/16845) ([Azat Khuzhin](https://github.com/azat)).
+* Server refused to startup with exception message if wrong config is given (`metric_log`.`collect_interval_milliseconds` is missing). [#16815](https://github.com/ClickHouse/ClickHouse/pull/16815) ([Ivan](https://github.com/abyss7)).
+* Better exception message when configuration for distributed DDL is absent. This fixes [#5075](https://github.com/ClickHouse/ClickHouse/issues/5075). [#16769](https://github.com/ClickHouse/ClickHouse/pull/16769) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Usability improvement: better suggestions in syntax error message when `CODEC` expression is misplaced in `CREATE TABLE` query. This fixes [#12493](https://github.com/ClickHouse/ClickHouse/issues/12493). [#16768](https://github.com/ClickHouse/ClickHouse/pull/16768) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Remove empty directories for async INSERT at start of Distributed engine. [#16729](https://github.com/ClickHouse/ClickHouse/pull/16729) ([Azat Khuzhin](https://github.com/azat)).
+* Workaround for use S3 with nginx server as proxy. Nginx currenty does not accept urls with empty path like `http://domain.com?delete`, but vanilla aws-sdk-cpp produces this kind of urls. This commit uses patched aws-sdk-cpp version, which makes urls with "/" as path in this cases, like `http://domain.com/?delete`. [#16709](https://github.com/ClickHouse/ClickHouse/pull/16709) ([ianton-ru](https://github.com/ianton-ru)).
+* Allow `reinterpretAs*` functions to work for integers and floats of the same size. Implements [16640](https://github.com/ClickHouse/ClickHouse/issues/16640). [#16657](https://github.com/ClickHouse/ClickHouse/pull/16657) ([flynn](https://github.com/ucasFL)).
+* Now, `<auxiliary_zookeepers>` configuration can be changed in `config.xml` and reloaded without server startup. [#16627](https://github.com/ClickHouse/ClickHouse/pull/16627) ([Amos Bird](https://github.com/amosbird)).
+* Support SNI in https connections to remote resources. This will allow to connect to Cloudflare servers that require SNI. This fixes [#10055](https://github.com/ClickHouse/ClickHouse/issues/10055). [#16252](https://github.com/ClickHouse/ClickHouse/pull/16252) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Make it possible to connect to `clickhouse-server` secure endpoint which requires SNI. This is possible when `clickhouse-server` is hosted behind TLS proxy. [#16938](https://github.com/ClickHouse/ClickHouse/pull/16938) ([filimonov](https://github.com/filimonov)).
+* Fix possible stack overflow if a loop of materialized views is created. This closes [#15732](https://github.com/ClickHouse/ClickHouse/issues/15732). [#16048](https://github.com/ClickHouse/ClickHouse/pull/16048) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Simplify the implementation of background tasks processing for the MergeTree table engines family. There should be no visible changes for user. [#15983](https://github.com/ClickHouse/ClickHouse/pull/15983) ([alesapin](https://github.com/alesapin)).
+* Improvement for MaterializeMySQL (experimental feature). Throw exception about right sync privileges when MySQL sync user has error privileges. [#15977](https://github.com/ClickHouse/ClickHouse/pull/15977) ([TCeason](https://github.com/TCeason)).
+* Made `indexOf()` use BloomFilter. [#14977](https://github.com/ClickHouse/ClickHouse/pull/14977) ([achimbab](https://github.com/achimbab)).
+
+#### Performance Improvement
+
+* Use Floyd-Rivest algorithm, it is the best for the ClickHouse use case of partial sorting. Bechmarks are in https://github.com/danlark1/miniselect and [here](https://drive.google.com/drive/folders/1DHEaeXgZuX6AJ9eByeZ8iQVQv0ueP8XM). [#16825](https://github.com/ClickHouse/ClickHouse/pull/16825) ([Danila Kutenin](https://github.com/danlark1)).
+* Now `ReplicatedMergeTree` tree engines family uses a separate thread pool for replicated fetches. Size of the pool limited by setting `background_fetches_pool_size` which can be tuned with a server restart. The default value of the setting is 3 and it means that the maximum amount of parallel fetches is equal to 3 (and it allows to utilize 10G network). Fixes #520. [#16390](https://github.com/ClickHouse/ClickHouse/pull/16390) ([alesapin](https://github.com/alesapin)).
+* Fixed uncontrolled growth of the state of `quantileTDigest`. [#16680](https://github.com/ClickHouse/ClickHouse/pull/16680) ([hrissan](https://github.com/hrissan)).
+* Add `VIEW` subquery description to `EXPLAIN`. Limit push down optimisation for `VIEW`. Add local replicas of `Distributed` to query plan. [#14936](https://github.com/ClickHouse/ClickHouse/pull/14936) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix optimize_read_in_order/optimize_aggregation_in_order with max_threads > 0 and expression in ORDER BY. [#16637](https://github.com/ClickHouse/ClickHouse/pull/16637) ([Azat Khuzhin](https://github.com/azat)).
+* Fix performance of reading from `Merge` tables over huge number of `MergeTree` tables. Fixes [#7748](https://github.com/ClickHouse/ClickHouse/issues/7748). [#16988](https://github.com/ClickHouse/ClickHouse/pull/16988) ([Anton Popov](https://github.com/CurtizJ)).
+* Now we can safely prune partitions with exact match. Useful case: Suppose table is partitioned by `intHash64(x) % 100` and the query has condition on `intHash64(x) % 100` verbatim, not on x. [#16253](https://github.com/ClickHouse/ClickHouse/pull/16253) ([Amos Bird](https://github.com/amosbird)).
+
+#### Experimental Feature
+
+* Add `EmbeddedRocksDB` table engine (can be used for dictionaries). [#15073](https://github.com/ClickHouse/ClickHouse/pull/15073) ([sundyli](https://github.com/sundy-li)).
+
+#### Build/Testing/Packaging Improvement
+
+* Improvements in test coverage building images. [#17233](https://github.com/ClickHouse/ClickHouse/pull/17233) ([alesapin](https://github.com/alesapin)).
+* Update embedded timezone data to version 2020d (also update cctz to the latest master). [#17204](https://github.com/ClickHouse/ClickHouse/pull/17204) ([filimonov](https://github.com/filimonov)).
+* Fix UBSan report in Poco. This closes [#12719](https://github.com/ClickHouse/ClickHouse/issues/12719). [#16765](https://github.com/ClickHouse/ClickHouse/pull/16765) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Do not instrument 3rd-party libraries with UBSan. [#16764](https://github.com/ClickHouse/ClickHouse/pull/16764) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fix UBSan report in cache dictionaries. This closes [#12641](https://github.com/ClickHouse/ClickHouse/issues/12641). [#16763](https://github.com/ClickHouse/ClickHouse/pull/16763) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fix UBSan report when trying to convert infinite floating point number to integer. This closes [#14190](https://github.com/ClickHouse/ClickHouse/issues/14190). [#16677](https://github.com/ClickHouse/ClickHouse/pull/16677) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+
+
 ## ClickHouse release 20.11

 ### ClickHouse release v20.11.3.3-stable, 2020-11-13
--- a/base/common/CMakeLists.txt
+++ b/base/common/CMakeLists.txt
@ -6,6 +6,7 @@ set (SRCS
    demangle.cpp
    getFQDNOrHostName.cpp
    getMemoryAmount.cpp
+    getPageSize.cpp
    getThreadId.cpp
    JSON.cpp
    LineReader.cpp
--- a/base/common/ReadlineLineReader.cpp
+++ b/base/common/ReadlineLineReader.cpp
@ -1,4 +1,5 @@
 #include <common/ReadlineLineReader.h>
+#include <common/errnoToString.h>
 #include <ext/scope_guard.h>

 #include <errno.h>
@ -69,7 +70,7 @@ ReadlineLineReader::ReadlineLineReader(
    {
        int res = read_history(history_file_path.c_str());
        if (res)
-            std::cerr << "Cannot read history from file " + history_file_path + ": "+ strerror(errno) << std::endl;
+            std::cerr << "Cannot read history from file " + history_file_path + ": "+ errnoToString(errno) << std::endl;
    }

    /// Added '.' to the default list. Because it is used to separate database and table.
@ -107,7 +108,7 @@ ReadlineLineReader::ReadlineLineReader(
    };

    if (signal(SIGINT, clear_prompt_or_exit) == SIG_ERR)
-        throw std::runtime_error(std::string("Cannot set signal handler for readline: ") + strerror(errno));
+        throw std::runtime_error(std::string("Cannot set signal handler for readline: ") + errnoToString(errno));

    rl_variable_bind("completion-ignore-case", "on");
    // TODO: it doesn't work
--- a/base/common/ReplxxLineReader.cpp
+++ b/base/common/ReplxxLineReader.cpp
@ -47,7 +47,7 @@ ReplxxLineReader::ReplxxLineReader(
            {
                if (!rx.history_load(history_file_path))
                {
-                    rx.print("Loading history failed: %s\n", strerror(errno));
+                    rx.print("Loading history failed: %s\n", errnoToString(errno).c_str());
                }

                if (flock(history_file_fd, LOCK_UN))
@ -88,7 +88,7 @@ ReplxxLineReader::ReplxxLineReader(
 ReplxxLineReader::~ReplxxLineReader()
 {
    if (close(history_file_fd))
-        rx.print("Close of history file failed: %s\n", strerror(errno));
+        rx.print("Close of history file failed: %s\n", errnoToString(errno).c_str());
 }

 LineReader::InputStatus ReplxxLineReader::readOneLine(const String & prompt)
@ -113,7 +113,7 @@ void ReplxxLineReader::addToHistory(const String & line)
    // and that is why flock() is added here.
    bool locked = false;
    if (flock(history_file_fd, LOCK_EX))
-        rx.print("Lock of history file failed: %s\n", strerror(errno));
+        rx.print("Lock of history file failed: %s\n", errnoToString(errno).c_str());
    else
        locked = true;

@ -121,10 +121,10 @@ void ReplxxLineReader::addToHistory(const String & line)

    // flush changes to the disk
    if (!rx.history_save(history_file_path))
-        rx.print("Saving history failed: %s\n", strerror(errno));
+        rx.print("Saving history failed: %s\n", errnoToString(errno).c_str());

    if (locked && 0 != flock(history_file_fd, LOCK_UN))
-        rx.print("Unlock of history file failed: %s\n", strerror(errno));
+        rx.print("Unlock of history file failed: %s\n", errnoToString(errno).c_str());
 }

 void ReplxxLineReader::enableBracketedPaste()
--- a/base/common/getMemoryAmount.cpp
+++ b/base/common/getMemoryAmount.cpp
@ -1,5 +1,6 @@
 #include <stdexcept>
 #include "common/getMemoryAmount.h"
+#include "common/getPageSize.h"

 #include <unistd.h>
 #include <sys/types.h>
@ -18,7 +19,7 @@ uint64_t getMemoryAmountOrZero()
    if (num_pages <= 0)
        return 0;

-    int64_t page_size = sysconf(_SC_PAGESIZE);
+    int64_t page_size = getPageSize();
    if (page_size <= 0)
        return 0;

--- a/base/common/getPageSize.cpp
+++ b/base/common/getPageSize.cpp
@ -0,0 +1,8 @@
+#include "common/getPageSize.h"
+
+#include <unistd.h>
+
+Int64 getPageSize()
+{
+    return sysconf(_SC_PAGESIZE);
+}
--- a/base/common/getPageSize.h
+++ b/base/common/getPageSize.h
@ -0,0 +1,6 @@
+#pragma once
+
+#include "common/types.h"
+
+/// Get memory page size
+Int64 getPageSize();
--- a/base/common/setTerminalEcho.cpp
+++ b/base/common/setTerminalEcho.cpp
@ -1,6 +1,7 @@
 // https://stackoverflow.com/questions/1413445/reading-a-password-from-stdcin

 #include <common/setTerminalEcho.h>
+#include <common/errnoToString.h>
 #include <stdexcept>
 #include <cstring>
 #include <string>
@ -31,7 +32,7 @@ void setTerminalEcho(bool enable)
 #else
    struct termios tty;
    if (tcgetattr(STDIN_FILENO, &tty))
-        throw std::runtime_error(std::string("setTerminalEcho failed get: ") + strerror(errno));
+        throw std::runtime_error(std::string("setTerminalEcho failed get: ") + errnoToString(errno));
    if (!enable)
        tty.c_lflag &= ~ECHO;
    else
@ -39,6 +40,6 @@ void setTerminalEcho(bool enable)

    auto ret = tcsetattr(STDIN_FILENO, TCSANOW, &tty);
    if (ret)
-        throw std::runtime_error(std::string("setTerminalEcho failed set: ") + strerror(errno));
+        throw std::runtime_error(std::string("setTerminalEcho failed set: ") + errnoToString(errno));
 #endif
 }
--- a/base/common/ya.make
+++ b/base/common/ya.make
@ -47,6 +47,7 @@ SRCS(
    errnoToString.cpp
    getFQDNOrHostName.cpp
    getMemoryAmount.cpp
+    getPageSize.cpp
    getResource.cpp
    getThreadId.cpp
    mremap.cpp
--- a/cmake/sanitize.cmake
+++ b/cmake/sanitize.cmake
@ -41,9 +41,10 @@ if (SANITIZE)
        if (COMPILER_CLANG)
            set (TSAN_FLAGS "${TSAN_FLAGS} -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/tsan_suppressions.txt")
        else()
-            message (WARNING "TSAN suppressions was not passed to the compiler (since the compiler is not clang)")
-            message (WARNING "Use the following command to pass them manually:")
-            message (WARNING "    export TSAN_OPTIONS=\"$TSAN_OPTIONS suppressions=${CMAKE_SOURCE_DIR}/tests/tsan_suppressions.txt\"")
+            set (MESSAGE "TSAN suppressions was not passed to the compiler (since the compiler is not clang)\n")
+            set (MESSAGE "${MESSAGE}Use the following command to pass them manually:\n")
+            set (MESSAGE "${MESSAGE}    export TSAN_OPTIONS=\"$TSAN_OPTIONS suppressions=${CMAKE_SOURCE_DIR}/tests/tsan_suppressions.txt\"")
+            message (WARNING "${MESSAGE}")
        endif()


@ -57,8 +58,18 @@ if (SANITIZE)
        endif ()

    elseif (SANITIZE STREQUAL "undefined")
-        set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} -fsanitize=undefined -fno-sanitize-recover=all -fno-sanitize=float-divide-by-zero -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/ubsan_suppressions.txt")
-        set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} -fsanitize=undefined -fno-sanitize-recover=all -fno-sanitize=float-divide-by-zero -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/ubsan_suppressions.txt")
+        set (UBSAN_FLAGS "-fsanitize=undefined -fno-sanitize-recover=all -fno-sanitize=float-divide-by-zero")
+        if (COMPILER_CLANG)
+            set (UBSAN_FLAGS "${UBSAN_FLAGS} -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/ubsan_suppressions.txt")
+        else()
+            set (MESSAGE "UBSAN suppressions was not passed to the compiler (since the compiler is not clang)\n")
+            set (MESSAGE "${MESSAGE}Use the following command to pass them manually:\n")
+            set (MESSAGE "${MESSAGE}        export UBSAN_OPTIONS=\"$UBSAN_OPTIONS suppressions=${CMAKE_SOURCE_DIR}/tests/ubsan_suppressions.txt\"")
+            message (WARNING "${MESSAGE}")
+        endif()
+
+        set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${UBSAN_FLAGS}")
+        set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} ${UBSAN_FLAGS}")
        if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
            set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=undefined")
        endif()
--- a/contrib/boost
+++ b/contrib/boost
@ -1 +1 @@
-Subproject commit a7ceabe4747ecc3309dd3dcd9de4b29660dfd298
+Subproject commit 0b98b443aa7bb77d65efd7b23b3b8c8a0ab5f1f3
--- a/contrib/krb5
+++ b/contrib/krb5
@ -1 +1 @@
-Subproject commit 99f7ad2831a01f264c07eed42a0a3a9336b86184
+Subproject commit 90ff6f4f8c695d6bf1aaba78a9b8942be92141c2
--- a/contrib/libc-headers
+++ b/contrib/libc-headers
@ -1 +1 @@
-Subproject commit 92c74f938cf2c4dd529cae4f3d2923d153b029a7
+Subproject commit a720b7105a610acbd7427eea475a5b6810c151eb
--- a/contrib/libgsasl
+++ b/contrib/libgsasl
@ -1 +1 @@
-Subproject commit 140fb58250588c8323285b75fcf127c4adc33dfa
+Subproject commit 383ee28e82f69fa16ed43b48bd9c8ee5b313ab84
--- a/contrib/libhdfs3
+++ b/contrib/libhdfs3
@ -1 +1 @@
-Subproject commit 30552ac527f2c14070d834e171493b2e7f662375
+Subproject commit 095b9d48b400abb72d967cb0539af13b1e3d90cf
--- a/contrib/libhdfs3-cmake/CMakeLists.txt
+++ b/contrib/libhdfs3-cmake/CMakeLists.txt
@ -17,7 +17,12 @@ if (NOT USE_INTERNAL_PROTOBUF_LIBRARY AND PROTOBUF_OLD_ABI_COMPAT)
    endif ()
 endif()

-set(WITH_KERBEROS false)
+if (${ENABLE_LIBRARIES} AND ${ENABLE_KRB5})
+    SET(WITH_KERBEROS 1)
+else()
+    SET(WITH_KERBEROS 0)
+endif()
+
 # project and source dir
 set(HDFS3_ROOT_DIR ${ClickHouse_SOURCE_DIR}/contrib/libhdfs3)
 set(HDFS3_SOURCE_DIR ${HDFS3_ROOT_DIR}/src)
@ -28,11 +33,6 @@ set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMake" ${CMAKE_MODULE_PATH})
 include(Platform)
 include(Options)

-# prefer shared libraries
-if (WITH_KERBEROS)
-    find_package(KERBEROS REQUIRED)
-endif()
-
 # source
 set(PROTO_FILES
    #${HDFS3_SOURCE_DIR}/proto/encryption.proto
@ -207,14 +207,11 @@ target_include_directories(hdfs3 PRIVATE ${HDFS3_COMMON_DIR})
 target_include_directories(hdfs3 PRIVATE ${CMAKE_CURRENT_BINARY_DIR})

 target_include_directories(hdfs3 PRIVATE ${LIBGSASL_INCLUDE_DIR})
-if (WITH_KERBEROS)
-    target_include_directories(hdfs3 PRIVATE ${KERBEROS_INCLUDE_DIRS})
-endif()
 target_include_directories(hdfs3 PRIVATE ${LIBXML2_INCLUDE_DIR})

 target_link_libraries(hdfs3 PRIVATE ${LIBGSASL_LIBRARY})
 if (WITH_KERBEROS)
-    target_link_libraries(hdfs3 PRIVATE ${KERBEROS_LIBRARIES})
+    target_link_libraries(hdfs3 PRIVATE ${KRB5_LIBRARY})
 endif()
 target_link_libraries(hdfs3 PRIVATE ${LIBXML2_LIBRARIES})

--- a/contrib/librdkafka
+++ b/contrib/librdkafka
@ -1 +1 @@
-Subproject commit 9902bc4fb18bb441fa55ca154b341cdda191e5d3
+Subproject commit f2f6616419d567c9198aef0d1133a2e9b4f02276
--- a/contrib/libunwind
+++ b/contrib/libunwind
@ -1 +1 @@
-Subproject commit 51b84d9b6d2548f1cbdcafe622d5a753853b6149
+Subproject commit 8fe25d7dc70f2a4ea38c3e5a33fa9d4199b67a5a
--- a/debian/control
+++ b/debian/control
@ -40,7 +40,7 @@ Description: Common files for ClickHouse
 Package: clickhouse-server
 Architecture: all
 Depends: ${shlibs:Depends}, ${misc:Depends}, clickhouse-common-static (= ${binary:Version}), adduser
-Recommends: libcap2-bin
+Recommends: libcap2-bin, krb5-user
 Replaces: clickhouse-server-common, clickhouse-server-base
 Provides: clickhouse-server-common
 Description: Server binary for ClickHouse
--- a/docker/images.json
+++ b/docker/images.json
@ -58,8 +58,7 @@
    "docker/test/stateless": {
        "name": "yandex/clickhouse-stateless-test",
        "dependent": [
-            "docker/test/stateful",
-            "docker/test/stateful_with_coverage"
+            "docker/test/stateful"
        ]
    },
    "docker/test/stateless_pytest": {
@ -68,7 +67,9 @@
    },
    "docker/test/stateless_with_coverage": {
        "name": "yandex/clickhouse-stateless-test-with-coverage",
-        "dependent": []
+        "dependent": [
+            "docker/test/stateful_with_coverage"
+        ]
    },
    "docker/test/unit": {
        "name": "yandex/clickhouse-unit-test",
@ -157,5 +158,9 @@
         "name": "yandex/clickhouse-stateless-unbundled-test",
         "dependent": [
         ]
+    },
+    "docker/test/integration/kerberized_hadoop": {
+        "name": "yandex/clickhouse-kerberized-hadoop",
+        "dependent": []
    }
 }
--- a/docker/packager/binary/build.sh
+++ b/docker/packager/binary/build.sh
@ -31,6 +31,7 @@ find . -name '*.so.*' -print -exec mv '{}' /output \;
 if [ "performance" == "$COMBINED_OUTPUT" ]
 then
    cp -r ../tests/performance /output
+    cp -r ../tests/config/top_level_domains  /
    cp -r ../docker/test/performance-comparison/config /output ||:
    rm /output/unit_tests_dbms ||:
    rm /output/clickhouse-odbc-bridge ||:
--- a/docker/packager/packager
+++ b/docker/packager/packager
@ -148,6 +148,10 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ

    if split_binary:
        cmake_flags.append('-DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 -DCLICKHOUSE_SPLIT_BINARY=1')
+        # We can't always build utils because it requires too much space, but
+        # we have to build them at least in some way in CI. The split build is
+        # probably the least heavy disk-wise.
+        cmake_flags.append('-DENABLE_UTILS=1')

    if clang_tidy:
        cmake_flags.append('-DENABLE_CLANG_TIDY=1')
--- a/docker/server/README.md
+++ b/docker/server/README.md
@ -15,6 +15,8 @@ For more information and documentation see https://clickhouse.yandex/.
 $ docker run -d --name some-clickhouse-server --ulimit nofile=262144:262144 yandex/clickhouse-server
 ```

+By default ClickHouse will be accessible only via docker network. See the [networking section below](#networking).
+
 ### connect to it from a native client
 ```bash
 $ docker run -it --rm --link some-clickhouse-server:clickhouse-server yandex/clickhouse-client --host clickhouse-server
@ -22,6 +24,70 @@ $ docker run -it --rm --link some-clickhouse-server:clickhouse-server yandex/cli

 More information about [ClickHouse client](https://clickhouse.yandex/docs/en/interfaces/cli/).

+### connect to it using curl
+
+```bash
+echo "SELECT 'Hello, ClickHouse!'" | docker run -i --rm --link some-clickhouse-server:clickhouse-server curlimages/curl 'http://clickhouse-server:8123/?query=' -s --data-binary @-
+```
+More information about [ClickHouse HTTP Interface](https://clickhouse.tech/docs/en/interfaces/http/).
+
+### stopping / removing the containter
+
+```bash
+$ docker stop some-clickhouse-server
+$ docker rm some-clickhouse-server
+```
+
+### networking
+
+You can expose you ClickHouse running in docker by [mapping particular port](https://docs.docker.com/config/containers/container-networking/) from inside container to a host ports:
+
+```bash
+$ docker run -d -p 18123:8123 -p19000:9000 --name some-clickhouse-server --ulimit nofile=262144:262144 yandex/clickhouse-server
+$ echo 'SELECT version()' | curl 'http://localhost:18123/' --data-binary @-
+20.12.3.3
+```
+
+or by allowing container to use [host ports directly](https://docs.docker.com/network/host/) using `--network=host` (also allows archiving better network performance):
+
+```bash
+$ docker run -d --network=host --name some-clickhouse-server --ulimit nofile=262144:262144 yandex/clickhouse-server
+$ echo 'SELECT version()' | curl 'http://localhost:8123/' --data-binary @-
+20.12.3.3
+```
+
+### Volumes 
+
+Typically you may want to mount the following folders inside your container to archieve persistency:
+
+* `/var/lib/clickhouse/` - main folder where ClickHouse stores the data
+* `/val/log/clickhouse-server/` - logs
+
+```bash
+$ docker run -d \
+	-v $(realpath ./ch_data):/var/lib/clickhouse/ \
+	-v $(realpath ./ch_logs):/var/log/clickhouse-server/ \
+	--name some-clickhouse-server --ulimit nofile=262144:262144 yandex/clickhouse-server
+```
+
+You may also want to mount:
+
+* `/etc/clickhouse-server/config.d/*.xml` - files with server configuration adjustmenets
+* `/etc/clickhouse-server/usert.d/*.xml` - files with use settings adjustmenets
+* `/docker-entrypoint-initdb.d/` - folder with database initialization scripts (see below).
+
+### Linux capabilities 
+
+ClickHouse has some advanced functionality which requite enabling several [linux capabilities](https://man7.org/linux/man-pages/man7/capabilities.7.html).
+
+It is optional and can be enabled using the following [docker command line agruments](https://docs.docker.com/engine/reference/run/#runtime-privilege-and-linux-capabilities):
+
+```bash
+$ docker run -d \
+	--cap-add=SYS_NICE --cap-add=NET_ADMIN --cap-add=IPC_LOCK \
+	--name some-clickhouse-server --ulimit nofile=262144:262144 yandex/clickhouse-server
+```
+
 ## Configuration

 Container exposes 8123 port for [HTTP interface](https://clickhouse.yandex/docs/en/interfaces/http_interface/) and 9000 port for [native client](https://clickhouse.yandex/docs/en/interfaces/tcp/).
--- a/docker/test/base/Dockerfile
+++ b/docker/test/base/Dockerfile
@ -1,5 +1,5 @@
 # docker build -t yandex/clickhouse-test-base .
-FROM ubuntu:19.10
+FROM ubuntu:20.04

 ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=11

--- a/docker/test/integration/kerberized_hadoop/Dockerfile
+++ b/docker/test/integration/kerberized_hadoop/Dockerfile
@ -0,0 +1,18 @@
+# docker build -t yandex/clickhouse-kerberized-hadoop .
+
+FROM sequenceiq/hadoop-docker:2.7.0
+RUN sed -i -e 's/^\#baseurl/baseurl/' /etc/yum.repos.d/CentOS-Base.repo
+RUN sed -i -e 's/^mirrorlist/#mirrorlist/' /etc/yum.repos.d/CentOS-Base.repo
+RUN sed -i -e 's#http://mirror.centos.org/#http://vault.centos.org/#' /etc/yum.repos.d/CentOS-Base.repo
+RUN yum clean all && \
+	rpm --rebuilddb &&	\
+	yum -y update && \
+	yum -y install yum-plugin-ovl && \
+	yum --quiet -y install krb5-workstation.x86_64
+RUN cd /tmp && \
+	curl http://archive.apache.org/dist/commons/daemon/source/commons-daemon-1.0.15-src.tar.gz   -o  commons-daemon-1.0.15-src.tar.gz && \
+	tar xzf commons-daemon-1.0.15-src.tar.gz && \
+	cd commons-daemon-1.0.15-src/src/native/unix && \
+	./configure && \
+	make && \
+	cp ./jsvc /usr/local/hadoop/sbin
--- a/docker/test/integration/runner/Dockerfile
+++ b/docker/test/integration/runner/Dockerfile
@ -29,6 +29,8 @@ RUN apt-get update \
    libcurl4-openssl-dev \
    gdb \
    software-properties-common \
+    libkrb5-dev \
+    krb5-user \
    && rm -rf \
        /var/lib/apt/lists/* \
        /var/cache/debconf \
@ -75,7 +77,8 @@ RUN python3 -m pip install \
    pytest-timeout \
    redis \
    tzlocal \
-    urllib3
+    urllib3 \
+    requests-kerberos

 COPY modprobe.sh /usr/local/bin/modprobe
 COPY dockerd-entrypoint.sh /usr/local/bin/
--- a/docker/test/integration/runner/compose/docker_compose_hdfs.yml
+++ b/docker/test/integration/runner/compose/docker_compose_hdfs.yml
@ -2,6 +2,7 @@ version: '2.3'
 services:
    hdfs1:
        image: sequenceiq/hadoop-docker:2.7.0
+        hostname: hdfs1
        restart: always
        ports:
          - 50075:50075
--- a/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml
+++ b/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml
@ -0,0 +1,29 @@
+version: '2.3'
+
+services:
+  kerberizedhdfs1:
+    cap_add:
+      - DAC_READ_SEARCH
+    image: yandex/clickhouse-kerberized-hadoop:16621
+    hostname: kerberizedhdfs1
+    restart: always
+    volumes:
+        - ${KERBERIZED_HDFS_DIR}/../../hdfs_configs/bootstrap.sh:/etc/bootstrap.sh:ro
+        - ${KERBERIZED_HDFS_DIR}/secrets:/usr/local/hadoop/etc/hadoop/conf
+        - ${KERBERIZED_HDFS_DIR}/secrets/krb_long.conf:/etc/krb5.conf:ro
+    ports:
+      - 1006:1006
+      - 50070:50070
+      - 9000:9000
+    depends_on:
+        - hdfskerberos
+    entrypoint: /etc/bootstrap.sh -d
+
+  hdfskerberos:
+    image: yandex/clickhouse-kerberos-kdc:${DOCKER_KERBEROS_KDC_TAG}
+    hostname: hdfskerberos
+    volumes:
+        - ${KERBERIZED_HDFS_DIR}/secrets:/tmp/keytab
+        - ${KERBERIZED_HDFS_DIR}/../../kerberos_image_config.sh:/config.sh
+        - /dev/urandom:/dev/random
+    ports: [88, 749]
--- a/docker/test/performance-comparison/config/config.d/zzz-perf-comparison-tweaks-config.xml
+++ b/docker/test/performance-comparison/config/config.d/zzz-perf-comparison-tweaks-config.xml
@ -3,6 +3,7 @@
    <mysql_port remove="remove"/>
    <interserver_http_port remove="remove"/>
    <tcp_with_proxy_port remove="remove"/>
+    <test_keeper_server remove="remove"/>
    <listen_host>::</listen_host>

    <logger>
--- a/docker/test/stateful_with_coverage/Dockerfile
+++ b/docker/test/stateful_with_coverage/Dockerfile
@ -1,8 +1,6 @@
 # docker build -t yandex/clickhouse-stateful-test-with-coverage .
 FROM yandex/clickhouse-stateless-test-with-coverage

-RUN echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-9 main" >> /etc/apt/sources.list
-
 RUN apt-get update -y \
    && env DEBIAN_FRONTEND=noninteractive \
        apt-get install --yes --no-install-recommends \
--- a/docs/en/engines/table-engines/integrations/hdfs.md
+++ b/docs/en/engines/table-engines/integrations/hdfs.md
@ -108,6 +108,95 @@ Create table with files named `file000`, `file001`, … , `file999`:
 ``` sql
 CREATE TABLE big_table (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV')
 ```
+## Configuration {#configuration}
+
+Similar to GraphiteMergeTree, the HDFS engine supports extended configuration using the ClickHouse config file. There are two configuration keys that you can use: global (`hdfs`) and user-level (`hdfs_*`). The global configuration is applied first, and then the user-level configuration is applied (if it exists).
+
+``` xml
+  <!-- Global configuration options for HDFS engine type -->
+  <hdfs>
+	<hadoop_kerberos_keytab>/tmp/keytab/clickhouse.keytab</hadoop_kerberos_keytab>
+	<hadoop_kerberos_principal>clickuser@TEST.CLICKHOUSE.TECH</hadoop_kerberos_principal>
+	<hadoop_security_authentication>kerberos</hadoop_security_authentication>
+  </hdfs>
+
+  <!-- Configuration specific for user "root" -->
+  <hdfs_root>
+	<hadoop_kerberos_principal>root@TEST.CLICKHOUSE.TECH</hadoop_kerberos_principal>
+  </hdfs_root>
+```
+
+### List of possible configuration options with default values
+#### Supported by libhdfs3
+
+
+| **parameter**                                         | **default value**       |
+| rpc\_client\_connect\_tcpnodelay                      | true                    |
+| dfs\_client\_read\_shortcircuit                       | true                    |
+| output\_replace-datanode-on-failure                   | true                    |
+| input\_notretry-another-node                          | false                   |
+| input\_localread\_mappedfile                          | true                    |
+| dfs\_client\_use\_legacy\_blockreader\_local          | false                   |
+| rpc\_client\_ping\_interval                           | 10  * 1000              |
+| rpc\_client\_connect\_timeout                         | 600 * 1000              |
+| rpc\_client\_read\_timeout                            | 3600 * 1000             |
+| rpc\_client\_write\_timeout                           | 3600 * 1000             |
+| rpc\_client\_socekt\_linger\_timeout                  | -1                      |
+| rpc\_client\_connect\_retry                           | 10                      |
+| rpc\_client\_timeout                                  | 3600 * 1000             |
+| dfs\_default\_replica                                 | 3                       |
+| input\_connect\_timeout                               | 600 * 1000              |
+| input\_read\_timeout                                  | 3600 * 1000             |
+| input\_write\_timeout                                 | 3600 * 1000             |
+| input\_localread\_default\_buffersize                 | 1 * 1024 * 1024         |
+| dfs\_prefetchsize                                     | 10                      |
+| input\_read\_getblockinfo\_retry                      | 3                       |
+| input\_localread\_blockinfo\_cachesize                | 1000                    |
+| input\_read\_max\_retry                               | 60                      |
+| output\_default\_chunksize                            | 512                     |
+| output\_default\_packetsize                           | 64 * 1024               |
+| output\_default\_write\_retry                         | 10                      |
+| output\_connect\_timeout                              | 600 * 1000              |
+| output\_read\_timeout                                 | 3600 * 1000             |
+| output\_write\_timeout                                | 3600 * 1000             |
+| output\_close\_timeout                                | 3600 * 1000             |
+| output\_packetpool\_size                              | 1024                    |
+| output\_heeartbeat\_interval                          | 10 * 1000               |
+| dfs\_client\_failover\_max\_attempts                  | 15                      |
+| dfs\_client\_read\_shortcircuit\_streams\_cache\_size | 256                     |
+| dfs\_client\_socketcache\_expiryMsec                  | 3000                    |
+| dfs\_client\_socketcache\_capacity                    | 16                      |
+| dfs\_default\_blocksize                               | 64 * 1024 * 1024        |
+| dfs\_default\_uri                                     | "hdfs://localhost:9000" |
+| hadoop\_security\_authentication                      | "simple"                |
+| hadoop\_security\_kerberos\_ticket\_cache\_path       | ""                      |
+| dfs\_client\_log\_severity                            | "INFO"                  |
+| dfs\_domain\_socket\_path                             | ""                      |
+
+
+[HDFS Configuration Reference ](https://hawq.apache.org/docs/userguide/2.3.0.0-incubating/reference/HDFSConfigurationParameterReference.html) might explain some parameters.
+
+
+#### ClickHouse extras {#clickhouse-extras}
+
+| **parameter**                                         | **default value**       |
+|hadoop\_kerberos\_keytab                               | ""                      |
+|hadoop\_kerberos\_principal                            | ""                      |
+|hadoop\_kerberos\_kinit\_command                       | kinit                   |
+
+#### Limitations {#limitations}
+
+  * hadoop\_security\_kerberos\_ticket\_cache\_path can be global only, not user specific
+
+## Kerberos support {#kerberos-support}
+
+If hadoop\_security\_authentication parameter has value 'kerberos', ClickHouse authentifies via Kerberos facility.
+Parameters [here](#clickhouse-extras) and hadoop\_security\_kerberos\_ticket\_cache\_path may be of help.
+Note that due to libhdfs3 limitations only old-fashioned approach is supported,
+datanode communications are not secured by SASL (HADOOP\_SECURE\_DN\_USER is a reliable indicator of such
+security approach). Use tests/integration/test\_storage\_kerberized\_hdfs/hdfs_configs/bootstrap.sh for reference.
+
+If hadoop\_kerberos\_keytab, hadoop\_kerberos\_principal or hadoop\_kerberos\_kinit\_command is specified, kinit will be invoked. hadoop\_kerberos\_keytab and hadoop\_kerberos\_principal are mandatory in this case. kinit tool and krb5 configuration files are required.

 ## Virtual Columns {#virtual-columns}

--- a/docs/en/interfaces/cli.md
+++ b/docs/en/interfaces/cli.md
@ -57,7 +57,7 @@ The command line is based on ‘replxx’ (similar to ‘readline’). In other

 By default, the format used is PrettyCompact. You can change the format in the FORMAT clause of the query, or by specifying `\G` at the end of the query, using the `--format` or `--vertical` argument in the command line, or using the client configuration file.

-To exit the client, press Ctrl+D (or Ctrl+C), or enter one of the following instead of a query: “exit”, “quit”, “logout”, “exit;”, “quit;”, “logout;”, “q”, “Q”, “:q”
+To exit the client, press Ctrl+D, or enter one of the following instead of a query: “exit”, “quit”, “logout”, “exit;”, “quit;”, “logout;”, “q”, “Q”, “:q”

 When processing a query, the client shows:

--- a/docs/en/introduction/performance.md
+++ b/docs/en/introduction/performance.md
@ -25,6 +25,6 @@ Under the same conditions, ClickHouse can handle several hundred queries per sec

 ## Performance When Inserting Data {#performance-when-inserting-data}

-We recommend inserting data in packets of at least 1000 rows, or no more than a single request per second. When inserting to a MergeTree table from a tab-separated dump, the insertion speed can be from 50 to 200 MB/s. If the inserted rows are around 1 Kb in size, the speed will be from 50,000 to 200,000 rows per second. If the rows are small, the performance can be higher in rows per second (on Banner System data -`>` 500,000 rows per second; on Graphite data -`>` 1,000,000 rows per second). To improve performance, you can make multiple INSERT queries in parallel, which scales linearly.
+We recommend inserting data in packets of at least 1000 rows, or no more than a single request per second. When inserting to a MergeTree table from a tab-separated dump, the insertion speed can be from 50 to 200 MB/s. If the inserted rows are around 1 KB in size, the speed will be from 50,000 to 200,000 rows per second. If the rows are small, the performance can be higher in rows per second (on Banner System data -`>` 500,000 rows per second; on Graphite data -`>` 1,000,000 rows per second). To improve performance, you can make multiple INSERT queries in parallel, which scales linearly.

 {## [Original article](https://clickhouse.tech/docs/en/introduction/performance/) ##}
--- a/docs/en/operations/system-tables/distribution_queue.md
+++ b/docs/en/operations/system-tables/distribution_queue.md
@ -39,7 +39,7 @@ data_compressed_bytes: 499
 last_exception:        
 ```

-**See also**
+**See Also**

 -   [Distributed table engine](../../engines/table-engines/special/distributed.md)

--- a/docs/en/operations/system-tables/replication_queue.md
+++ b/docs/en/operations/system-tables/replication_queue.md
@ -0,0 +1,81 @@
+# system.replication_queue {#system_tables-replication_queue}
+
+Contains information about tasks from replication queues stored in ZooKeeper for tables in the `ReplicatedMergeTree` family.
+
+Columns:
+
+-   `database` ([String](../../sql-reference/data-types/string.md)) — Name of the database.
+
+-   `table` ([String](../../sql-reference/data-types/string.md)) — Name of the table.
+
+-   `replica_name` ([String](../../sql-reference/data-types/string.md)) — Replica name in ZooKeeper. Different replicas of the same table have different names.
+
+-   `position` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Position of the task in the queue.
+
+-   `node_name` ([String](../../sql-reference/data-types/string.md)) — Node name in ZooKeeper.
+
+-   `type` ([String](../../sql-reference/data-types/string.md)) — Type of the task in the queue: `GET_PARTS`, `MERGE_PARTS`, `DETACH_PARTS`, `DROP_PARTS`, or `MUTATE_PARTS`.
+
+-   `create_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — Date and time when the task was submitted for execution.
+
+-   `required_quorum` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of replicas waiting for the task to complete with confirmation of completion. This column is only relevant for the `GET_PARTS` task.
+
+-   `source_replica` ([String](../../sql-reference/data-types/string.md)) — Name of the source replica.
+
+-   `new_part_name` ([String](../../sql-reference/data-types/string.md)) — Name of the new part.
+
+-   `parts_to_merge` ([Array](../../sql-reference/data-types/array.md) ([String](../../sql-reference/data-types/string.md))) — Names of parts to merge or update.
+
+-   `is_detach` ([UInt8](../../sql-reference/data-types/int-uint.md)) — The flag indicates whether the `DETACH_PARTS` task is in the queue.
+
+-   `is_currently_executing` ([UInt8](../../sql-reference/data-types/int-uint.md)) — The flag indicates whether a specific task is being performed right now.
+
+-   `num_tries` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of failed attempts to complete the task.
+
+-   `last_exception` ([String](../../sql-reference/data-types/string.md)) — Text message about the last error that occurred (if any).
+
+-   `last_attempt_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — Date and time when the task was last attempted.
+
+-   `num_postponed` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of postponed tasks.
+
+-   `postpone_reason` ([String](../../sql-reference/data-types/string.md)) — The reason why the task was postponed.
+
+-   `last_postpone_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — Date and time when the task was last postponed.
+
+-   `merge_type` ([String](../../sql-reference/data-types/string.md)) — Type of the current merge. Empty if it's a mutation.
+
+**Example**
+
+``` sql
+SELECT * FROM system.replication_queue LIMIT 1 FORMAT Vertical;
+```
+
+``` text
+Row 1:
+──────
+database:               merge
+table:                  visits_v2
+replica_name:           mtgiga001-1t.metrika.yandex.net
+position:               15
+node_name:              queue-0009325559
+type:                   MERGE_PARTS
+create_time:            2020-12-07 14:04:21
+required_quorum:        0
+source_replica:         mtgiga001-1t.metrika.yandex.net
+new_part_name:          20201130_121373_121384_2
+parts_to_merge:         ['20201130_121373_121378_1','20201130_121379_121379_0','20201130_121380_121380_0','20201130_121381_121381_0','20201130_121382_121382_0','20201130_121383_121383_0','20201130_121384_121384_0']
+is_detach:              0
+is_currently_executing: 0
+num_tries:              36
+last_exception:         Code: 226, e.displayText() = DB::Exception: Marks file '/opt/clickhouse/data/merge/visits_v2/tmp_fetch_20201130_121373_121384_2/CounterID.mrk' doesn't exist (version 20.8.7.15 (official build))
+last_attempt_time:      2020-12-08 17:35:54
+num_postponed:          0
+postpone_reason:        
+last_postpone_time:     1970-01-01 03:00:00
+```
+
+**See Also**
+
+-   [Managing ReplicatedMergeTree Tables](../../sql-reference/statements/system.md/#query-language-system-replicated)
+
+[Original article](https://clickhouse.tech/docs/en/operations/system_tables/replication_queue) <!--hide-->
--- a/docs/en/operations/tips.md
+++ b/docs/en/operations/tips.md
@ -91,6 +91,23 @@ The Linux kernel prior to 3.2 had a multitude of problems with IPv6 implementati

 Use at least a 10 GB network, if possible. 1 Gb will also work, but it will be much worse for patching replicas with tens of terabytes of data, or for processing distributed queries with a large amount of intermediate data.

+## Hypervisor configuration
+
+If you are using OpenStack, set
+```
+cpu_mode=host-passthrough
+```
+in nova.conf.
+
+If you are using libvirt, set
+```
+<cpu mode='host-passthrough'/>
+```
+in XML configuration.
+
+This is important for ClickHouse to be able to get correct information with `cpuid` instruction.
+Otherwise you may get `Illegal instruction` crashes when hypervisor is run on old CPU models.
+
 ## ZooKeeper {#zookeeper}

 You are probably already using ZooKeeper for other purposes. You can use the same installation of ZooKeeper, if it isn’t already overloaded.
--- a/docs/en/sql-reference/aggregate-functions/combinators.md
+++ b/docs/en/sql-reference/aggregate-functions/combinators.md
@ -25,6 +25,10 @@ Example 2: `uniqArray(arr)` – Counts the number of unique elements in all ‘a

 -If and -Array can be combined. However, ‘Array’ must come first, then ‘If’. Examples: `uniqArrayIf(arr, cond)`, `quantilesTimingArrayIf(level1, level2)(arr, cond)`. Due to this order, the ‘cond’ argument won’t be an array.

+## -SimpleState {#agg-functions-combinator-simplestate}
+
+If you apply this combinator, the aggregate function returns the same value but with a different type. This is an `SimpleAggregateFunction(...)` that can be stored in a table to work with [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md) table engines.
+
 ## -State {#agg-functions-combinator-state}

 If you apply this combinator, the aggregate function doesn’t return the resulting value (such as the number of unique values for the [uniq](../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) function), but an intermediate state of the aggregation (for `uniq`, this is the hash table for calculating the number of unique values). This is an `AggregateFunction(...)` that can be used for further processing or stored in a table to finish aggregating later.
--- a/docs/en/sql-reference/functions/array-functions.md
+++ b/docs/en/sql-reference/functions/array-functions.md
@ -1288,12 +1288,30 @@ Returns the index of the first element in the `arr1` array for which `func` retu

 Note that the `arrayFirstIndex` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted.

+## arrayMin(\[func,\] arr1, …) {#array-min}
+
+Returns the sum of the `func` values. If the function is omitted, it just returns the min of the array elements.
+
+Note that the `arrayMin` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument.
+
+## arrayMax(\[func,\] arr1, …) {#array-max}
+
+Returns the sum of the `func` values. If the function is omitted, it just returns the min of the array elements.
+
+Note that the `arrayMax` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument.
+
 ## arraySum(\[func,\] arr1, …) {#array-sum}

 Returns the sum of the `func` values. If the function is omitted, it just returns the sum of the array elements.

 Note that the `arraySum` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument.

+## arrayAvg(\[func,\] arr1, …) {#array-avg}
+
+Returns the sum of the `func` values. If the function is omitted, it just returns the average of the array elements.
+
+Note that the `arrayAvg` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument.
+
 ## arrayCumSum(\[func,\] arr1, …) {#arraycumsumfunc-arr1}

 Returns an array of partial sums of elements in the source array (a running sum). If the `func` function is specified, then the values of the array elements are converted by this function before summing.
--- a/docs/en/sql-reference/functions/type-conversion-functions.md
+++ b/docs/en/sql-reference/functions/type-conversion-functions.md
@ -430,6 +430,63 @@ SELECT toTypeName(CAST(x, 'Nullable(UInt16)')) FROM t_null

 -   [cast_keep_nullable](../../operations/settings/settings.md#cast_keep_nullable) setting

+## accurateCast(x, T) {#type_conversion_function-accurate-cast}
+
+Converts ‘x’ to the ‘t’ data type. The differente from cast(x, T) is that accurateCast
+does not allow overflow of numeric types during cast if type value x does not fit
+bounds of type T.
+
+Example
+``` sql
+SELECT cast(-1, 'UInt8') as uint8; 
+```
+
+
+``` text
+┌─uint8─┐
+│   255 │
+└───────┘
+```
+
+```sql
+SELECT accurateCast(-1, 'UInt8') as uint8;
+```
+
+``` text
+Code: 70. DB::Exception: Received from localhost:9000. DB::Exception: Value in column Int8 cannot be safely converted into type UInt8: While processing accurateCast(-1, 'UInt8') AS uint8.
+
+```
+
+## accurateCastOrNull(x, T) {#type_conversion_function-accurate-cast_or_null}
+
+Converts ‘x’ to the ‘t’ data type. Always returns nullable type and returns NULL 
+if the casted value is not representable in the target type.
+
+Example:
+
+``` sql
+SELECT
+    accurateCastOrNull(-1, 'UInt8') as uint8,
+    accurateCastOrNull(128, 'Int8') as int8,
+    accurateCastOrNull('Test', 'FixedString(2)') as fixed_string
+```
+
+``` text
+┌─uint8─┬─int8─┬─fixed_string─┐
+│  ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ         │
+└───────┴──────┴──────────────┘┘
+```
+
+``` sql
+SELECT toTypeName(accurateCastOrNull(5, 'UInt8'))
+```
+
+``` text
+┌─toTypeName(accurateCastOrNull(5, 'UInt8'))─┐
+│ Nullable(UInt8)                            │
+└────────────────────────────────────────────┘
+```
+
 ## toInterval(Year\|Quarter\|Month\|Week\|Day\|Hour\|Minute\|Second) {#function-tointerval}

 Converts a Number type argument to an [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type.
--- a/docs/en/sql-reference/statements/system.md
+++ b/docs/en/sql-reference/statements/system.md
@ -204,7 +204,7 @@ SYSTEM STOP MOVES [[db.]merge_tree_family_table_name]

 ## Managing ReplicatedMergeTree Tables {#query-language-system-replicated}

-ClickHouse can manage background replication related processes in [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replication/#table_engines-replication) tables.
+ClickHouse can manage background replication related processes in [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replication.md#table_engines-replication) tables.

 ### STOP FETCHES {#query_language-system-stop-fetches}

--- a/docs/es/interfaces/cli.md
+++ b/docs/es/interfaces/cli.md
@ -59,7 +59,7 @@ La línea de comandos se basa en ‘replxx’ (similar a ‘readline’). En otr

 De forma predeterminada, el formato utilizado es PrettyCompact. Puede cambiar el formato en la cláusula FORMAT de la consulta o especificando `\G` al final de la consulta, utilizando el `--format` o `--vertical` en la línea de comandos, o utilizando el archivo de configuración del cliente.

-Para salir del cliente, presione Ctrl+D (o Ctrl+C) o introduzca una de las siguientes opciones en lugar de una consulta: “exit”, “quit”, “logout”, “exit;”, “quit;”, “logout;”, “q”, “Q”, “:q”
+Para salir del cliente, presione Ctrl+D o introduzca una de las siguientes opciones en lugar de una consulta: “exit”, “quit”, “logout”, “exit;”, “quit;”, “logout;”, “q”, “Q”, “:q”

 Al procesar una consulta, el cliente muestra:

--- a/docs/ja/interfaces/cli.md
+++ b/docs/ja/interfaces/cli.md
@ -59,7 +59,7 @@ $ cat file.csv | clickhouse-client --database=test --query="INSERT INTO test FOR

 既定では、使用される形式はPrettyCompactです。 クエリのFORMAT句で書式を変更するか、次のように指定することができます `\G` クエリの最後に、 `--format` または `--vertical` コマンドラインでの引数、またはクライアント構成ファイルの使用。

-クライアントを終了するには、Ctrl+D(またはCtrl+C)を押すか、クエリの代わりに次のいずれかを入力します: “exit”, “quit”, “logout”, “exit;”, “quit;”, “logout;”, “q”, “Q”, “:q”
+クライアントを終了するには、Ctrl+Dを押すか、クエリの代わりに次のいずれかを入力します: “exit”, “quit”, “logout”, “exit;”, “quit;”, “logout;”, “q”, “Q”, “:q”

 が処理クエリー、クライアントを示し:

--- a/docs/ru/faq/general/index.md
+++ b/docs/ru/faq/general/index.md
@ -0,0 +1,25 @@
+---
+title: General questions about ClickHouse
+toc_hidden_folder: true
+toc_priority: 1
+toc_title: Общие вопросы
+---
+
+# Общие вопросы о ClickHouse {#obshchie-voprosy}
+
+Вопросы:
+
+-   Что такое ClickHouse?
+-   Почему ClickHouse такой быстрый?
+-   Кто пользуется ClickHouse?
+-   Что обозначает название “ClickHouse”?
+-   Что значит “Не тормозит”?
+-   Что такое OLAP?
+-   Что такое колоночная база данных?
+-   [Почему бы не использовать системы типа MapReduce?](mapreduce.md)
+
+!!! info "Если вы не нашли то, что искали:"
+    Загляните в другие категории F.A.Q. или поищите в других разделах документации, ориентируйтесь по оглавлению слева.
+
+{## [Original article](https://clickhouse.tech/docs/ru/faq/general/) ##}
+
--- a/docs/ru/faq/general/mapreduce.md
+++ b/docs/ru/faq/general/mapreduce.md
@ -1,8 +1,12 @@
-# Общие вопросы {#obshchie-voprosy}
+---
+title: Why not use something like MapReduce?
+toc_hidden: true
+toc_priority: 110
+---

 ## Почему бы не использовать системы типа MapReduce? {#pochemu-by-ne-ispolzovat-sistemy-tipa-mapreduce}

-Системами типа MapReduce будем называть системы распределённых вычислений, в которых операция reduce сделана на основе распределённой сортировки. Наиболее распространённым opensource решением данного класса является [Apache Hadoop](http://hadoop.apache.org). Яндекс использует собственное решение — YT.
+Системами типа MapReduce будем называть системы распределённых вычислений, в которых операция reduce сделана на основе распределённой сортировки. Наиболее распространённым opensource решением данного класса является [Apache Hadoop](http://hadoop.apache.org). Яндекс использует собственное решение — YT. 

 Такие системы не подходят для онлайн запросов в силу слишком большой latency. То есть, не могут быть использованы в качестве бэкенда для веб-интерфейса.
 Такие системы не подходят для обновления данных в реальном времени.
@ -10,47 +14,3 @@
 Распределённая сортировка является основной причиной тормозов при выполнении несложных map-reduce задач.

 Большинство реализаций MapReduce позволяют выполнять произвольный код на кластере. Но для OLAP задач лучше подходит декларативный язык запросов, который позволяет быстро проводить исследования. Для примера, для Hadoop существует Hive и Pig. Также смотрите Cloudera Impala, Shark (устаревший) для Spark, а также Spark SQL, Presto, Apache Drill. Впрочем, производительность при выполнении таких задач является сильно неоптимальной по сравнению со специализированными системами, а сравнительно высокая latency не позволяет использовать эти системы в качестве бэкенда для веб-интерфейса.
-
-## Что делать, если у меня проблема с кодировками при использовании Oracle через ODBC? {#oracle-odbc-encodings}
-
-Если вы используете Oracle через драйвер ODBC в качестве источника внешних словарей, необходимо задать правильное значение для переменной окружения `NLS_LANG` в `/etc/default/clickhouse`. Подробнее читайте в [Oracle NLS_LANG FAQ](https://www.oracle.com/technetwork/products/globalization/nls-lang-099431.html).
-
-**Пример**
-
-``` sql
-NLS_LANG=RUSSIAN_RUSSIA.UTF8
-```
-
-## Как экспортировать данные из ClickHouse в файл? {#how-to-export-to-file}
-
-### Секция INTO OUTFILE {#sektsiia-into-outfile}
-
-Добавьте секцию [INTO OUTFILE](../sql-reference/statements/select/into-outfile.md#into-outfile-clause) к своему запросу.
-
-Например:
-
-``` sql
-SELECT * FROM table INTO OUTFILE 'file'
-```
-
-По умолчанию, для выдачи данных ClickHouse использует формат [TabSeparated](../interfaces/formats.md#tabseparated). Чтобы выбрать [формат данных](../interfaces/formats.md), используйте [секцию FORMAT](../sql-reference/statements/select/format.md#format-clause).
-
-Например:
-
-``` sql
-SELECT * FROM table INTO OUTFILE 'file' FORMAT CSV
-```
-
-### Таблица с движком File {#tablitsa-s-dvizhkom-file}
-
-Смотрите [File](../engines/table-engines/special/file.md).
-
-### Перенаправление в командой строке {#perenapravlenie-v-komandoi-stroke}
-
-``` sql
-$ clickhouse-client --query "SELECT * from table" --format FormatName > result.txt
-```
-
-Смотрите [clickhouse-client](../interfaces/cli.md).
-
-[Оригинальная статья](https://clickhouse.tech/docs/en/faq/general/) <!--hide-->
--- a/docs/ru/faq/index.md
+++ b/docs/ru/faq/index.md
@ -4,3 +4,14 @@ toc_hidden: true
 toc_priority: 76
 ---

+# Содержание F.A.Q. {#soderzhanie}
+
+В этом разделе документации собрали вопросы о ClickHouse, которые задают чаще всего.
+
+Категории:
+
+-   **[Общие вопросы](../faq/general/index.md)** 
+-   **[Применение](../faq/use-cases/index.md)**
+-   **[Операции](../faq/operations/index.md)**  
+-   **[Интеграция](../faq/integration/index.md)**
+
--- a/docs/ru/faq/integration/file-export.md
+++ b/docs/ru/faq/integration/file-export.md
@ -0,0 +1,37 @@
+---
+title: How do I export data from ClickHouse to a file?
+toc_hidden: true
+toc_priority: 10
+---
+
+## Как экспортировать данные из ClickHouse в файл? {#how-to-export-to-file-rus}
+
+### Секция INTO OUTFILE {#sektsiia-into-outfile-rus}
+
+Добавьте секцию [INTO OUTFILE](../../sql-reference/statements/select/into-outfile.md#into-outfile-clause) к своему запросу.
+
+Например:
+
+``` sql
+SELECT * FROM table INTO OUTFILE 'file'
+```
+
+По умолчанию, для выдачи данных ClickHouse использует формат [TabSeparated](../../interfaces/formats.md#tabseparated). Чтобы выбрать [формат данных](../../interfaces/formats.md), используйте секцию [FORMAT](../../sql-reference/statements/select/format.md#format-clause).
+
+Например:
+
+``` sql
+SELECT * FROM table INTO OUTFILE 'file' FORMAT CSV
+```
+
+## Таблица с движком File {#using-a-file-engine-table}
+
+Смотрите [File](../../engines/table-engines/special/file.md).
+
+## Перенаправление в командой строке {#using-command-line-redirection}
+
+``` bash
+$ clickhouse-client --query "SELECT * from table" --format FormatName > result.txt
+```
+
+Смотрите [clickhouse-client](../../interfaces/cli.md).
--- a/docs/ru/faq/integration/index.md
+++ b/docs/ru/faq/integration/index.md
@ -0,0 +1,19 @@
+---
+title: Questions about integrating ClickHouse and other systems
+toc_hidden_folder: true
+toc_priority: 4
+toc_title: Интеграция
+---
+
+# Вопросы об интеграции ClickHouse с другими системами {#question-about-integrating-clickhouse-and-other-systems-rus}
+
+Вопросы:
+
+-   [Как экспортировать данные из ClickHouse в файл?](file-export.md)
+-   Как импортировать JSON в ClickHouse?
+-   [Что делать, если у меня проблема с кодировками при использовании Oracle через ODBC?](oracle-odbc.md)
+
+!!! info "Если вы не нашли то, что искали"
+    Загляните в другие подразделы F.A.Q. или поищите в остальных разделах документации, ориентируйтесь по оглавлению слева.
+
+{## [Original article](https://clickhouse.tech/docs/ru/faq/integration/) ##}
--- a/docs/ru/faq/integration/oracle-odbc.md
+++ b/docs/ru/faq/integration/oracle-odbc.md
@ -0,0 +1,15 @@
+---
+title: What if I have a problem with encodings when using Oracle via ODBC?
+toc_hidden: true
+toc_priority: 20
+---
+
+## Что делать, если у меня проблема с кодировками при использовании Oracle через ODBC? {#oracle-odbc-encodings-rus}
+
+Если вы используете Oracle через драйвер ODBC в качестве источника внешних словарей, необходимо задать правильное значение для переменной окружения `NLS_LANG` в `/etc/default/clickhouse`. Подробнее читайте в [Oracle NLS_LANG FAQ](https://www.oracle.com/technetwork/products/globalization/nls-lang-099431.html).
+
+**Пример**
+
+``` sql
+NLS_LANG=RUSSIAN_RUSSIA.UTF8
+```
--- a/docs/ru/faq/operations/index.md
+++ b/docs/ru/faq/operations/index.md
@ -0,0 +1,18 @@
+---
+title: Question about operating ClickHouse servers and clusters
+toc_hidden_folder: true
+toc_priority: 3
+toc_title: Операции
+---
+
+# Вопросы о производительности серверов и кластеров ClickHouse {#voprosy-ob-operating-clickhouse-servers-and-clusters}
+
+Вопросы:
+
+-   Which ClickHouse version to use in production?
+-   Is it possible to delete old records from a ClickHouse table?
+
+    !!! info "Don’t see what you were looking for?"
+        Check out [other F.A.Q. categories](../../faq/index.md) or browse around main documentation articles found in the left sidebar.
+
+    {## [Original article](https://clickhouse.tech/docs/en/faq/production/) ##}
--- a/docs/ru/faq/use-cases/index.md
+++ b/docs/ru/faq/use-cases/index.md
@ -0,0 +1,14 @@
+---
+title: Questions about ClickHouse use cases
+toc_hidden_folder: true
+toc_priority: 2
+toc_title: Применение
+---
+
+# Вопросы о применении ClickHouse {#voprosy-o-primenenii}
+
+Вопросы:
+
+-   Can I use ClickHouse as a time-series database?
+-   Can I use ClickHouse as a key-value storage?
+
--- a/docs/ru/interfaces/cli.md
+++ b/docs/ru/interfaces/cli.md
@ -63,7 +63,7 @@ $ cat file.csv | clickhouse-client --database=test --query="INSERT INTO test FOR

 По умолчанию, в качестве формата, используется формат PrettyCompact (красивые таблички). Вы можете изменить формат с помощью секции FORMAT запроса, или с помощью указания `\G` на конце запроса, с помощью аргумента командной строки `--format` или `--vertical`, или с помощью конфигурационного файла клиента.

-Чтобы выйти из клиента, нажмите Ctrl+D (или Ctrl+C), или наберите вместо запроса одно из: «exit», «quit», «logout», «учше», «йгше», «дщпщге», «exit;», «quit;», «logout;», «учшеж», «йгшеж», «дщпщгеж», «q», «й», «q», «Q», «:q», «й», «Й», «Жй»
+Чтобы выйти из клиента, нажмите Ctrl+D, или наберите вместо запроса одно из: «exit», «quit», «logout», «учше», «йгше», «дщпщге», «exit;», «quit;», «logout;», «учшеж», «йгшеж», «дщпщгеж», «q», «й», «q», «Q», «:q», «й», «Й», «Жй»

 При выполнении запроса, клиент показывает:

--- a/docs/ru/operations/system-tables/replication_queue.md
+++ b/docs/ru/operations/system-tables/replication_queue.md
@ -0,0 +1,81 @@
+# system.replication_queue {#system_tables-replication_queue}
+
+Содержит информацию о задачах из очередей репликации, хранящихся в ZooKeeper, для таблиц семейства `ReplicatedMergeTree`.
+
+Столбцы:
+
+-   `database` ([String](../../sql-reference/data-types/string.md)) — имя базы данных.
+
+-   `table` ([String](../../sql-reference/data-types/string.md)) — имя таблицы.
+
+-   `replica_name` ([String](../../sql-reference/data-types/string.md)) — имя реплики в ZooKeeper. Разные реплики одной и той же таблицы имеют различные имена.
+
+-   `position` ([UInt32](../../sql-reference/data-types/int-uint.md)) — позиция задачи в очереди.
+
+-   `node_name` ([String](../../sql-reference/data-types/string.md)) — имя узла в ZooKeeper.
+
+-   `type` ([String](../../sql-reference/data-types/string.md)) — тип задачи в очереди: `GET_PARTS`, `MERGE_PARTS`, `DETACH_PARTS`, `DROP_PARTS` или `MUTATE_PARTS`.
+
+-   `create_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — дата и время отправки задачи на выполнение.
+
+-   `required_quorum` ([UInt32](../../sql-reference/data-types/int-uint.md)) — количество реплик, ожидающих завершения задачи, с подтверждением о завершении. Этот столбец актуален только для задачи `GET_PARTS`.
+
+-   `source_replica` ([String](../../sql-reference/data-types/string.md)) — имя исходной реплики.
+
+-   `new_part_name` ([String](../../sql-reference/data-types/string.md)) — имя нового куска.
+
+-   `parts_to_merge` ([Array](../../sql-reference/data-types/array.md) ([String](../../sql-reference/data-types/string.md))) — имена кусков, которые требуется смержить или обновить.
+
+-   `is_detach` ([UInt8](../../sql-reference/data-types/int-uint.md)) — флаг, указывающий на присутствие в очереди задачи `DETACH_PARTS`.
+
+-   `is_currently_executing` ([UInt8](../../sql-reference/data-types/int-uint.md)) — флаг, указывающий на выполнение конкретной задачи на данный момент.
+
+-   `num_tries` ([UInt32](../../sql-reference/data-types/int-uint.md)) — количество неудачных попыток выполнить задачу.
+
+-   `last_exception` ([String](../../sql-reference/data-types/string.md)) — текст сообщения о последней возникшей ошибке, если таковые имеются.
+
+-   `last_attempt_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — дата и время последней попытки выполнить задачу.
+
+-   `num_postponed` ([UInt32](../../sql-reference/data-types/int-uint.md)) — количество отложенных задач.
+
+-   `postpone_reason` ([String](../../sql-reference/data-types/string.md)) — причина, по которой была отложена задача.
+
+-   `last_postpone_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — дата и время, когда была отложена задача в последний раз.
+
+-   `merge_type` ([String](../../sql-reference/data-types/string.md)) — тип текущего слияния. Пусто, если это мутация.
+
+**Пример**
+
+``` sql
+SELECT * FROM system.replication_queue LIMIT 1 FORMAT Vertical;
+```
+
+``` text
+Row 1:
+──────
+database:               merge
+table:                  visits_v2
+replica_name:           mtgiga001-1t.metrika.yandex.net
+position:               15
+node_name:              queue-0009325559
+type:                   MERGE_PARTS
+create_time:            2020-12-07 14:04:21
+required_quorum:        0
+source_replica:         mtgiga001-1t.metrika.yandex.net
+new_part_name:          20201130_121373_121384_2
+parts_to_merge:         ['20201130_121373_121378_1','20201130_121379_121379_0','20201130_121380_121380_0','20201130_121381_121381_0','20201130_121382_121382_0','20201130_121383_121383_0','20201130_121384_121384_0']
+is_detach:              0
+is_currently_executing: 0
+num_tries:              36
+last_exception:         Code: 226, e.displayText() = DB::Exception: Marks file '/opt/clickhouse/data/merge/visits_v2/tmp_fetch_20201130_121373_121384_2/CounterID.mrk' doesn't exist (version 20.8.7.15 (official build))
+last_attempt_time:      2020-12-08 17:35:54
+num_postponed:          0
+postpone_reason:        
+last_postpone_time:     1970-01-01 03:00:00
+```
+
+**Смотрите также**
+
+-   [Управление таблицами ReplicatedMergeTree](../../sql-reference/statements/system.md/#query-language-system-replicated)
+
+[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/system_tables/replication_queue) <!--hide-->
--- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md
+++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md
@ -199,7 +199,7 @@ SOURCE(ODBC(

 ClickHouse получает от ODBC-драйвера информацию о квотировании и квотирует настройки в запросах к драйверу, поэтому имя таблицы нужно указывать в соответствии с регистром имени таблицы в базе данных.

-Если у вас есть проблемы с кодировками при использовании Oracle, ознакомьтесь с соответствующим разделом [FAQ](../../../faq/general.md#oracle-odbc-encodings).
+Если у вас есть проблемы с кодировками при использовании Oracle, ознакомьтесь с соответствующим разделом [FAQ](../../../faq/integration/oracle-odbc.md).

 ### Выявленная уязвимость в функционировании ODBC словарей {#vyiavlennaia-uiazvimost-v-funktsionirovanii-odbc-slovarei}

--- a/docs/ru/sql-reference/functions/date-time-functions.md
+++ b/docs/ru/sql-reference/functions/date-time-functions.md
@ -593,6 +593,18 @@ SELECT dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-0
 Например, `timeSlots(toDateTime('2012-01-01 12:20:00'), toUInt32(600)) = [toDateTime('2012-01-01 12:00:00'), toDateTime('2012-01-01 12:30:00')]`.
 Это нужно для поиска хитов, входящих в соответствующий визит.

+## toYYYYMM
+
+Переводит дату или дату со временем в число типа UInt32, содержащее номер года и месяца (YYYY * 100 + MM).
+
+## toYYYYMMDD
+
+Переводит дату или дату со временем в число типа UInt32, содержащее номер года, месяца и дня (YYYY * 10000 + MM * 100 + DD).
+
+## toYYYYMMDDhhmmss
+
+Переводит дату или дату со временем в число типа UInt64 содержащее номер года, месяца, дня и время (YYYY * 10000000000 + MM * 100000000 + DD * 1000000 + hh * 10000 + mm * 100 + ss).
+
 ## formatDateTime {#formatdatetime}

 Функция преобразует дату-и-время в строку по заданному шаблону. Важно: шаблон — константное выражение, поэтому использовать разные шаблоны в одной колонке не получится.
--- a/docs/tr/interfaces/cli.md
+++ b/docs/tr/interfaces/cli.md
@ -59,7 +59,7 @@ Komut satırı dayanmaktadır ‘replxx’ (benzer ‘readline’). Başka bir d

 Varsayılan olarak, kullanılan biçim PrettyCompact. Sorgunun biçim yan tümcesinde veya belirterek biçimi değiştirebilirsiniz `\G` sorgunun sonunda, `--format` veya `--vertical` komut satırında veya istemci yapılandırma dosyasını kullanarak bağımsız değişken.

-İstemciden çıkmak için Ctrl+D (veya Ctrl+C) tuşlarına basın veya bir sorgu yerine aşağıdakilerden birini girin: “exit”, “quit”, “logout”, “exit;”, “quit;”, “logout;”, “q”, “Q”, “:q”
+İstemciden çıkmak için Ctrl+D tuşlarına basın veya bir sorgu yerine aşağıdakilerden birini girin: “exit”, “quit”, “logout”, “exit;”, “quit;”, “logout;”, “q”, “Q”, “:q”

 Bir sorguyu işlerken, istemci şunları gösterir:

--- a/docs/zh/sql-reference/statements/misc.md
+++ b/docs/zh/sql-reference/statements/misc.md
@ -9,19 +9,21 @@ toc_title: "\u5176\u4ED6"

 ## ATTACH {#attach}

-这个查询是完全一样的 `CREATE`，但是
+与`CREATE`类似，但有所区别

-   而不是这个词 `CREATE` 它使用这个词 `ATTACH`.
-   查询不会在磁盘上创建数据，但假定数据已经在适当的位置，只是将有关表的信息添加到服务器。
-    执行附加查询后，服务器将知道表的存在。
+-   使用关键词 `ATTACH`
+-   查询不会在磁盘上创建数据。但会假定数据已经在对应位置存放，同时将与表相关的信息添加到服务器。
+    执行 `ATTACH` 查询后，服务器将知道表已经被创建。

-如果表之前已分离 (`DETACH`），意味着其结构是已知的，可以使用速记而不限定该结构。
+如果表之前已分离 (`DETACH`），意味着其结构是已知的，可以使用简要的写法来建立表，即不需要定义表结构的Schema细节。

 ``` sql
 ATTACH TABLE [IF NOT EXISTS] [db.]name [ON CLUSTER cluster]
 ```

-启动服务器时使用此查询。 服务器将表元数据作为文件存储 `ATTACH` 查询，它只是在启动时运行（除了在服务器上显式创建的系统表）。
+启动服务器时会自动触发此查询。 
+
+服务器将表的元数据作为文件存储 `ATTACH` 查询，它只是在启动时运行。有些表例外，如系统表，它们是在服务器上显式指定的。

 ## CHECK TABLE {#check-table}

@ -31,13 +33,12 @@ ATTACH TABLE [IF NOT EXISTS] [db.]name [ON CLUSTER cluster]
 CHECK TABLE [db.]name
 ```

-该 `CHECK TABLE` 查询将实际文件大小与存储在服务器上的预期值进行比较。 如果文件大小与存储的值不匹配，则表示数据已损坏。 例如，这可能是由查询执行期间的系统崩溃引起的。
+`CHECK TABLE` 查询会比较存储在服务器上的实际文件大小与预期值。 如果文件大小与存储的值不匹配，则表示数据已损坏。 例如，这可能是由查询执行期间的系统崩溃引起的。

-查询响应包含 `result` 具有单行的列。 该行的值为
-[布尔值](../../sql-reference/data-types/boolean.md) 类型:
+查询返回一行结果，列名为 `result`, 该行的值为 [布尔值](../../sql-reference/data-types/boolean.md) 类型:

-   0-表中的数据已损坏。
-   1-数据保持完整性。
+-   0-表中的数据已损坏；
+-   1-数据保持完整性；

 该 `CHECK TABLE` 查询支持下表引擎:

@ -56,13 +57,14 @@ CHECK TABLE [db.]name

 如果表已损坏，则可以将未损坏的数据复制到另一个表。 要做到这一点:

-1.  创建一个与损坏的表结构相同的新表。 要做到这一点，请执行查询 `CREATE TABLE <new_table_name> AS <damaged_table_name>`.
+1.  创建一个与损坏的表结构相同的新表。 请执行查询 `CREATE TABLE <new_table_name> AS <damaged_table_name>`.
 2.  将 [max_threads](../../operations/settings/settings.md#settings-max_threads) 值设置为1，以在单个线程中处理下一个查询。 要这样做，请运行查询 `SET max_threads = 1`.
 3.  执行查询 `INSERT INTO <new_table_name> SELECT * FROM <damaged_table_name>`. 此请求将未损坏的数据从损坏的表复制到另一个表。 只有损坏部分之前的数据才会被复制。
 4.  重新启动 `clickhouse-client` 以重置 `max_threads` 值。

 ## DESCRIBE TABLE {#misc-describe-table}

+查看表的描述信息，返回各列的Schema，语法如下：
 ``` sql
 DESC|DESCRIBE TABLE [db.]table [INTO OUTFILE filename] [FORMAT format]
 ```
@ -73,24 +75,25 @@ DESC|DESCRIBE TABLE [db.]table [INTO OUTFILE filename] [FORMAT format]
 -   `type`— 列的类型。
 -   `default_type` — [默认表达式](create.md#create-default-values) (`DEFAULT`, `MATERIALIZED` 或 `ALIAS`)中使用的子句。 如果没有指定默认表达式，则列包含一个空字符串。
 -   `default_expression` — `DEFAULT` 子句中指定的值。
-   `comment_expression` — 注释。
+-   `comment_expression` — 注释信息。

 嵌套数据结构以 “expanded” 格式输出。 每列分别显示，列名后加点号。

 ## DETACH {#detach}

-从服务器中删除有关 ‘name’ 表的信息。 服务器停止了解该表的存在。
+从服务器中删除目标表信息（删除对象是表), 执行查询后,服务器视作该表已经不存在。

 ``` sql
 DETACH TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster]
 ```

 这不会删除表的数据或元数据。 在下一次服务器启动时，服务器将读取元数据并再次查找该表。
-同样，可以使用 `ATTACH` 查询重新连接一个 “detached” 的表（系统表除外，没有为它们存储元数据）。
+也可以不停止服务器的情况下，使用前面介绍的 `ATTACH` 查询来重新关联该表（系统表除外，没有为它们存储元数据）。

 ## DROP {#drop}

 删除已经存在的实体。如果指定 `IF EXISTS`， 则如果实体不存在，则不返回错误。
+建议使用时添加 `IF EXISTS` 修饰符。

 ## DROP DATABASE {#drop-database}

@ -135,7 +138,7 @@ DROP USER [IF EXISTS] name [,...] [ON CLUSTER cluster_name]

 删除角色。

-已删除的角色将从授予该角色的所有实体撤销。
+同时该角色所拥有的权限也会被收回。

 语法:

@ -199,6 +202,8 @@ EXISTS [TEMPORARY] [TABLE|DICTIONARY] [db.]name [INTO OUTFILE filename] [FORMAT

 ## KILL QUERY {#kill-query-statement}

+
+
 ``` sql
 KILL QUERY [ON CLUSTER cluster]
  WHERE <where expression to SELECT FROM system.processes query>
@ -219,16 +224,17 @@ KILL QUERY WHERE query_id='2-857d-4a57-9ee0-327da5d60a90'
 KILL QUERY WHERE user='username' SYNC
 ```

-只读用户只能停止自己的查询。
+只读用户只能停止自己提交的查询。

-默认情况下，使用异步版本的查询 (`ASYNC`），不等待确认查询已停止。
+默认情况下，使用异步版本的查询 (`ASYNC`），不需要等待确认查询已停止。

-同步版本 (`SYNC`）等待所有查询停止，并在停止时显示有关每个进程的信息。
-响应包含 `kill_status` 列，该列可以采用以下值:
+而相对的，终止同步版本 (`SYNC`）的查询会显示每步停止时间。
+
+返回信息包含 `kill_status` 列，该列可以采用以下值:

 1.  ‘finished’ – 查询已成功终止。
 2.  ‘waiting’ – 发送查询信号终止后，等待查询结束。
-3.  其他值解释为什么查询不能停止。
+3.  其他值，会解释为什么查询不能停止。

 测试查询 (`TEST`）仅检查用户的权限，并显示要停止的查询列表。

--- a/docs/zh/sql-reference/statements/select/limit.md
+++ b/docs/zh/sql-reference/statements/select/limit.md
@ -14,7 +14,7 @@ toc_title: LIMIT

 ## LIMIT … WITH TIES 修饰符 {#limit-with-ties}

-如果为 `LIMIT n[,m]` 设置了 `WITH TIES` ，并且声明了 `ORDER BY expr_list`, you will get in result first `n` or `n,m` rows and all rows with same `ORDER BY` fields values equal to row at position `n` for `LIMIT n` and `m` for `LIMIT n,m`.
+如果为 `LIMIT n[,m]` 设置了 `WITH TIES` ，并且声明了 `ORDER BY expr_list`, 除了得到无修饰符的结果（正常情况下的 `limit n`, 前n行数据), 还会返回与第`n`行具有相同排序字段的行(即如果第n+1行的字段与第n行 拥有相同的排序字段，同样返回该结果.

 此修饰符可以与： [ORDER BY … WITH FILL modifier](../../../sql-reference/statements/select/order-by.md#orderby-with-fill) 组合使用.

@ -38,7 +38,7 @@ SELECT * FROM (
 └───┘
 ```

-单子执行了 `WITH TIES` 修饰符后
+添加 `WITH TIES` 修饰符后

 ``` sql
 SELECT * FROM (
@ -59,4 +59,8 @@ SELECT * FROM (
 └───┘
 ```

-cause row number 6 have same value “2” for field `n` as row number 5
+虽然指定了`LIMIT 5`, 但第6行的`n`字段值为2，与第5行相同，因此也作为满足条件的记录返回。
+简而言之，该修饰符可理解为是否增加“并列行”的数据。
+
+``` sql，
+``` sql
--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@ -2335,7 +2335,7 @@ public:
                "Suggestion limit for how many databases, tables and columns to fetch.")
            ("multiline,m", "multiline")
            ("multiquery,n", "multiquery")
-            ("queries-file,qf", po::value<std::string>(), "file path with queries to execute")
+            ("queries-file", po::value<std::string>(), "file path with queries to execute")
            ("format,f", po::value<std::string>(), "default output format")
            ("testmode,T", "enable test hints in comments")
            ("ignore-error", "do not stop processing in multiquery mode")
--- a/programs/odbc-bridge/ColumnInfoHandler.cpp
+++ b/programs/odbc-bridge/ColumnInfoHandler.cpp
@ -115,7 +115,7 @@ void ODBCColumnsInfoHandler::handleRequest(Poco::Net::HTTPServerRequest & reques
        std::string name = schema_name.empty() ? backQuoteIfNeed(table_name) : backQuoteIfNeed(schema_name) + "." + backQuoteIfNeed(table_name);
        WriteBufferFromOwnString buf;
        std::string input = "SELECT * FROM " + name + " WHERE 1 = 0";
-        ParserQueryWithOutput parser;
+        ParserQueryWithOutput parser(input.data() + input.size());
        ASTPtr select = parseQuery(parser, input.data(), input.data() + input.size(), "", context_settings.max_query_size, context_settings.max_parser_depth);

        IAST::FormatSettings settings(buf, true);
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@ -139,6 +139,28 @@ void setupTmpPath(Poco::Logger * log, const std::string & path)
    }
 }

+int waitServersToFinish(std::vector<DB::ProtocolServerAdapter> & servers, size_t seconds_to_wait)
+{
+    const int sleep_max_ms = 1000 * seconds_to_wait;
+    const int sleep_one_ms = 100;
+    int sleep_current_ms = 0;
+    int current_connections = 0;
+    while (sleep_current_ms < sleep_max_ms)
+    {
+        current_connections = 0;
+        for (auto & server : servers)
+        {
+            server.stop();
+            current_connections += server.currentConnections();
+        }
+        if (!current_connections)
+            break;
+        sleep_current_ms += sleep_one_ms;
+        std::this_thread::sleep_for(std::chrono::milliseconds(sleep_one_ms));
+    }
+    return current_connections;
+}
+
 }

 namespace DB
@ -366,7 +388,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
    LOG_WARNING(log, "Server was built in debug mode. It will work slowly.");
 #endif

-#if defined(ADDRESS_SANITIZER) || defined(THREAD_SANITIZER) || defined(MEMORY_SANITIZER)
+#if defined(SANITIZER)
    LOG_WARNING(log, "Server was built with sanitizer. It will work slowly.");
 #endif

@ -794,8 +816,29 @@ int Server::main(const std::vector<std::string> & /*args*/)

        LOG_DEBUG(log, "Shut down storages.");

-        for (auto & server : servers_to_start_before_tables)
-            server.stop();
+        if (!servers_to_start_before_tables.empty())
+        {
+            LOG_DEBUG(log, "Waiting for current connections to servers for tables to finish.");
+            int current_connections = 0;
+            for (auto & server : servers_to_start_before_tables)
+            {
+                server.stop();
+                current_connections += server.currentConnections();
+            }
+
+            if (current_connections)
+                LOG_INFO(log, "Closed all listening sockets. Waiting for {} outstanding connections.", current_connections);
+            else
+                LOG_INFO(log, "Closed all listening sockets.");
+
+            if (current_connections > 0)
+                current_connections = waitServersToFinish(servers_to_start_before_tables, config().getInt("shutdown_wait_unfinished", 5));
+
+            if (current_connections)
+                LOG_INFO(log, "Closed connections to servers for tables. But {} remain. Probably some tables of other users cannot finish their connections after context shutdown.", current_connections);
+            else
+                LOG_INFO(log, "Closed connections to servers for tables.");
+        }

        /** Explicitly destroy Context. It is more convenient than in destructor of Server, because logger is still available.
          * At this moment, no one could own shared part of Context.
@ -1167,24 +1210,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
            global_context->getProcessList().killAllQueries();

            if (current_connections)
-            {
-                const int sleep_max_ms = 1000 * config().getInt("shutdown_wait_unfinished", 5);
-                const int sleep_one_ms = 100;
-                int sleep_current_ms = 0;
-                while (sleep_current_ms < sleep_max_ms)
-                {
-                    current_connections = 0;
-                    for (auto & server : servers)
-                    {
-                        server.stop();
-                        current_connections += server.currentConnections();
-                    }
-                    if (!current_connections)
-                        break;
-                    sleep_current_ms += sleep_one_ms;
-                    std::this_thread::sleep_for(std::chrono::milliseconds(sleep_one_ms));
-                }
-            }
+                current_connections = waitServersToFinish(servers, config().getInt("shutdown_wait_unfinished", 5));

            if (current_connections)
                LOG_INFO(log, "Closed connections. But {} remain."
--- a/programs/server/config.xml
+++ b/programs/server/config.xml
@ -48,97 +48,121 @@
        -->
    </logger>

-    <send_crash_reports>
-        <!-- Changing <enabled> to true allows sending crash reports to -->
-        <!-- the ClickHouse core developers team via Sentry https://sentry.io -->
-        <!-- Doing so at least in pre-production environments is highly appreciated -->
-        <enabled>false</enabled>
-        <!-- Change <anonymize> to true if you don't feel comfortable attaching the server hostname to the crash report -->
-        <anonymize>false</anonymize>
-        <!-- Default endpoint should be changed to different Sentry DSN only if you have -->
-        <!-- some in-house engineers or hired consultants who're going to debug ClickHouse issues for you -->
-        <endpoint>https://6f33034cfe684dd7a3ab9875e57b1c8d@o388870.ingest.sentry.io/5226277</endpoint>
-    </send_crash_reports>
+    <!-- It is the name that will be shown in the clickhouse-client.
+         By default, anything with "production" will be highlighted in red in query prompt.
+    -->
+    <!--display_name>production</display_name-->

-    <!--display_name>production</display_name--> <!-- It is the name that will be shown in the client -->
+    <!-- Port for HTTP API. See also 'https_port' for secure connections.
+         This interface is also used by ODBC and JDBC drivers (DataGrip, Dbeaver, ...)
+         and by most of web interfaces (embedded UI, Grafana, Redash, ...).
+      -->
    <http_port>8123</http_port>
+
+    <!-- Port for interaction by native protocol with:
+         - clickhouse-client and other native ClickHouse tools (clickhouse-benchmark, clickhouse-copier);
+         - clickhouse-server with other clickhouse-servers for distributed query processing;
+         - ClickHouse drivers and applications supporting native protocol
+         (this protocol is also informally called as "the TCP protocol");
+         See also 'tcp_port_secure' for secure connections.
+    -->
    <tcp_port>9000</tcp_port>
+
+    <!-- Compatibility with MySQL protocol.
+         ClickHouse will pretend to be MySQL for applications connecting to this port.
+    -->
    <mysql_port>9004</mysql_port>

-    <!-- For HTTPS and SSL over native protocol. -->
-    <!--
-    <https_port>8443</https_port>
-    <tcp_port_secure>9440</tcp_port_secure>
+    <!-- Compatibility with PostgreSQL protocol.
+         ClickHouse will pretend to be PostgreSQL for applications connecting to this port.
    -->
+    <!-- <postgresql_port>9005</postgresql_port> -->

-    <!-- TCP with PROXY protocol (PROXY header sent for every connection) -->
-    <!--
-    <tcp_with_proxy_port>9010</tcp_with_proxy_port>
+    <!-- HTTP API with TLS (HTTPS).
+         You have to configure certificate to enable this interface.
+         See the openSSL section below.
    -->
+    <!-- <https_port>8443</https_port> -->

-    <!-- Used with https_port and tcp_port_secure. Full ssl options list: https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h#L71 -->
-    <openSSL>
-        <server> <!-- Used for https server AND secure tcp port -->
-            <!-- openssl req -subj "/CN=localhost" -new -newkey rsa:2048 -days 365 -nodes -x509 -keyout /etc/clickhouse-server/server.key -out /etc/clickhouse-server/server.crt -->
-            <certificateFile>/etc/clickhouse-server/server.crt</certificateFile>
-            <privateKeyFile>/etc/clickhouse-server/server.key</privateKeyFile>
-            <!-- openssl dhparam -out /etc/clickhouse-server/dhparam.pem 4096 -->
-            <dhParamsFile>/etc/clickhouse-server/dhparam.pem</dhParamsFile>
-            <verificationMode>none</verificationMode>
-            <loadDefaultCAFile>true</loadDefaultCAFile>
-            <cacheSessions>true</cacheSessions>
-            <disableProtocols>sslv2,sslv3</disableProtocols>
-            <preferServerCiphers>true</preferServerCiphers>
-        </server>
-
-        <client> <!-- Used for connecting to https dictionary source and secured Zookeeper communication -->
-            <loadDefaultCAFile>true</loadDefaultCAFile>
-            <cacheSessions>true</cacheSessions>
-            <disableProtocols>sslv2,sslv3</disableProtocols>
-            <preferServerCiphers>true</preferServerCiphers>
-            <!-- Use for self-signed: <verificationMode>none</verificationMode> -->
-            <invalidCertificateHandler>
-                <!-- Use for self-signed: <name>AcceptCertificateHandler</name> -->
-                <name>RejectCertificateHandler</name>
-            </invalidCertificateHandler>
-        </client>
-    </openSSL>
-
-    <!-- Default root page on http[s] server. For example load UI from https://tabix.io/ when opening http://localhost:8123 -->
-    <!--
-    <http_server_default_response><![CDATA[<html ng-app="SMI2"><head><base href="http://ui.tabix.io/"></head><body><div ui-view="" class="content-ui"></div><script src="http://loader.tabix.io/master.js"></script></body></html>]]></http_server_default_response>
+    <!-- Native interface with TLS.
+         You have to configure certificate to enable this interface.
+         See the openSSL section below.
    -->
+    <!-- <tcp_port_secure>9440</tcp_port_secure> -->

-    <!-- Port for communication between replicas. Used for data exchange. -->
+    <!-- Native interface wrapped with PROXYv1 protocol
+         PROXYv1 header sent for every connection.
+         ClickHouse will extract information about proxy-forwarded client address from the header.
+    -->
+    <!-- <tcp_with_proxy_port>9011</tcp_with_proxy_port> -->
+
+    <!-- Port for communication between replicas. Used for data exchange.
+         It provides low-level data access between servers.
+         This port should not be accessible from untrusted networks.
+         See also 'interserver_http_credentials'.
+         Data transferred over connections to this port should not go through untrusted networks.
+         See also 'interserver_https_port'.
+      -->
    <interserver_http_port>9009</interserver_http_port>

+    <!-- Port for communication between replicas with TLS.
+         You have to configure certificate to enable this interface.
+         See the openSSL section below.
+         See also 'interserver_http_credentials'.
+      -->
+    <!-- <interserver_https_port>9010</interserver_https_port> -->
+
    <!-- Hostname that is used by other replicas to request this server.
         If not specified, than it is determined analogous to 'hostname -f' command.
-         This setting could be used to switch replication to another network interface.
+         This setting could be used to switch replication to another network interface
+         (the server may be connected to multiple networks via multiple addresses)
      -->
    <!--
    <interserver_http_host>example.yandex.ru</interserver_http_host>
    -->

-    <!-- Listen specified host. use :: (wildcard IPv6 address), if you want to accept connections both with IPv4 and IPv6 from everywhere. -->
+    <!-- You can specify credentials for authenthication between replicas.
+         This is required when interserver_https_port is accessible from untrusted networks,
+         and also recommended to avoid SSRF attacks from possibly compromised services in your network.
+      -->
+    <!--<interserver_http_credentials>
+        <user>interserver</user>
+        <password></password>
+    </interserver_http_credentials>-->
+
+    <!-- Listen specified address.
+         Use :: (wildcard IPv6 address), if you want to accept connections both with IPv4 and IPv6 from everywhere.
+         Notes:
+         If you open connections from wildcard address, make sure that at least one of the following measures applied:
+         - server is protected by firewall and not accessible from untrusted networks;
+         - all users are restricted to subset of network addresses (see users.xml);
+         - all users have strong passwords, only secure (TLS) interfaces are accessible, or connections are only made via TLS interfaces.
+         - users without password have readonly access.
+         See also: https://www.shodan.io/search?query=clickhouse
+      -->
    <!-- <listen_host>::</listen_host> -->
-    <!-- Same for hosts with disabled ipv6: -->
+
+    <!-- Same for hosts without support for IPv6: -->
    <!-- <listen_host>0.0.0.0</listen_host> -->

-    <!-- Default values - try listen localhost on ipv4 and ipv6: -->
+    <!-- Default values - try listen localhost on IPv4 and IPv6. -->
    <!--
    <listen_host>::1</listen_host>
    <listen_host>127.0.0.1</listen_host>
    -->
-    <!-- Don't exit if ipv6 or ipv4 unavailable, but listen_host with this protocol specified -->
+
+    <!-- Don't exit if IPv6 or IPv4 networks are unavailable while trying to listen. -->
    <!-- <listen_try>0</listen_try> -->

-    <!-- Allow listen on same address:port -->
+    <!-- Allow multiple servers to listen on the same address:port. This is not recommended.
+      -->
    <!-- <listen_reuse_port>0</listen_reuse_port> -->

    <!-- <listen_backlog>64</listen_backlog> -->

    <max_connections>4096</max_connections>
+
+    <!-- For 'Connection: keep-alive' in HTTP 1.1 -->
    <keep_alive_timeout>3</keep_alive_timeout>

    <!-- gRPC protocol (see src/Server/grpc_protos/clickhouse_grpc.proto for the API) -->
@ -172,6 +196,43 @@
        <verbose_logs>false</verbose_logs>
    </grpc>

+    <!-- Used with https_port and tcp_port_secure. Full ssl options list: https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h#L71 -->
+    <openSSL>
+        <server> <!-- Used for https server AND secure tcp port -->
+            <!-- openssl req -subj "/CN=localhost" -new -newkey rsa:2048 -days 365 -nodes -x509 -keyout /etc/clickhouse-server/server.key -out /etc/clickhouse-server/server.crt -->
+            <certificateFile>/etc/clickhouse-server/server.crt</certificateFile>
+            <privateKeyFile>/etc/clickhouse-server/server.key</privateKeyFile>
+            <!-- dhparams are optional. You can delete the <dhParamsFile> element.
+                 To generate dhparams, use the following command:
+                  openssl dhparam -out /etc/clickhouse-server/dhparam.pem 4096 
+                 Only file format with BEGIN DH PARAMETERS is supported.
+              -->
+            <dhParamsFile>/etc/clickhouse-server/dhparam.pem</dhParamsFile>
+            <verificationMode>none</verificationMode>
+            <loadDefaultCAFile>true</loadDefaultCAFile>
+            <cacheSessions>true</cacheSessions>
+            <disableProtocols>sslv2,sslv3</disableProtocols>
+            <preferServerCiphers>true</preferServerCiphers>
+        </server>
+
+        <client> <!-- Used for connecting to https dictionary source and secured Zookeeper communication -->
+            <loadDefaultCAFile>true</loadDefaultCAFile>
+            <cacheSessions>true</cacheSessions>
+            <disableProtocols>sslv2,sslv3</disableProtocols>
+            <preferServerCiphers>true</preferServerCiphers>
+            <!-- Use for self-signed: <verificationMode>none</verificationMode> -->
+            <invalidCertificateHandler>
+                <!-- Use for self-signed: <name>AcceptCertificateHandler</name> -->
+                <name>RejectCertificateHandler</name>
+            </invalidCertificateHandler>
+        </client>
+    </openSSL>
+
+    <!-- Default root page on http[s] server. For example load UI from https://tabix.io/ when opening http://localhost:8123 -->
+    <!--
+    <http_server_default_response><![CDATA[<html ng-app="SMI2"><head><base href="http://ui.tabix.io/"></head><body><div ui-view="" class="content-ui"></div><script src="http://loader.tabix.io/master.js"></script></body></html>]]></http_server_default_response>
+    -->
+
    <!-- Maximum number of concurrent queries. -->
    <max_concurrent_queries>100</max_concurrent_queries>

@ -605,7 +666,7 @@
                event_date + INTERVAL 1 WEEK
                event_date + INTERVAL 7 DAY DELETE
                event_date + INTERVAL 2 WEEK TO DISK 'bbb'
-        
+
        <ttl>event_date + INTERVAL 30 DAY DELETE</ttl>
        -->

@ -887,6 +948,18 @@
    </http_handlers>
    -->

+    <send_crash_reports>
+        <!-- Changing <enabled> to true allows sending crash reports to -->
+        <!-- the ClickHouse core developers team via Sentry https://sentry.io -->
+        <!-- Doing so at least in pre-production environments is highly appreciated -->
+        <enabled>false</enabled>
+        <!-- Change <anonymize> to true if you don't feel comfortable attaching the server hostname to the crash report -->
+        <anonymize>false</anonymize>
+        <!-- Default endpoint should be changed to different Sentry DSN only if you have -->
+        <!-- some in-house engineers or hired consultants who're going to debug ClickHouse issues for you -->
+        <endpoint>https://6f33034cfe684dd7a3ab9875e57b1c8d@o388870.ingest.sentry.io/5226277</endpoint>
+    </send_crash_reports>
+
    <!-- Uncomment to disable ClickHouse internal DNS caching. -->
    <!-- <disable_internal_dns_cache>1</disable_internal_dns_cache> -->
 </yandex>
--- a/src/Access/ContextAccess.cpp
+++ b/src/Access/ContextAccess.cpp
@ -392,9 +392,12 @@ bool ContextAccess::checkAccessImpl2(const AccessFlags & flags, const Args &...
    if (!getUser())
        return access_denied("User has been dropped", ErrorCodes::UNKNOWN_USER);

-    /// If the current user was allowed to create a temporary table
-    /// then he is allowed to do with it whatever he wants.
-    if ((sizeof...(args) >= 2) && (getDatabase(args...) == DatabaseCatalog::TEMPORARY_DATABASE))
+    /// Access to temporary tables is controlled in an unusual way, not like normal tables.
+    /// Creating of temporary tables is controlled by AccessType::CREATE_TEMPORARY_TABLES grant,
+    /// and other grants are considered as always given.
+    /// The DatabaseCatalog class won't resolve StorageID for temporary tables
+    /// which shouldn't be accessed.
+    if (getDatabase(args...) == DatabaseCatalog::TEMPORARY_DATABASE)
        return access_granted();

    auto acs = getAccessRightsWithImplicit();
--- a/src/Access/SettingsConstraints.cpp
+++ b/src/Access/SettingsConstraints.cpp
@ -156,6 +156,25 @@ bool SettingsConstraints::checkImpl(const Settings & current_settings, SettingCh
 {
    const String & setting_name = change.name;

+    if (setting_name == "profile")
+    {
+        /// TODO Check profile settings in Context::setProfile(...), not here. It will be backward incompatible.
+        const String & profile_name = change.value.safeGet<String>();
+        const auto & profile_settings_changes = manager->getProfileSettings(profile_name);
+        try
+        {
+            /// NOTE We cannot use CLAMP_ON_VIOLATION here, because we cannot modify elements of profile_settings_changes
+            for (auto change_copy : *profile_settings_changes)
+                checkImpl(current_settings, change_copy, THROW_ON_VIOLATION);
+        }
+        catch (Exception & e)
+        {
+            e.addMessage(", while trying to set settings profile {}", profile_name);
+            throw;
+        }
+        return true;
+    }
+
    bool cannot_cast;
    auto cast_value = [&](const Field & x) -> Field
    {
--- a/src/AggregateFunctions/AggregateFunctionAvg.h
+++ b/src/AggregateFunctions/AggregateFunctionAvg.h
@ -33,7 +33,7 @@ struct AvgFraction

    /// Allow division by zero as sometimes we need to return NaN.
    /// Invoked only is either Numerator or Denominator are Decimal.
-    Float64 NO_SANITIZE_UNDEFINED divideIfAnyDecimal(UInt32 num_scale, UInt32 denom_scale) const
+    Float64 NO_SANITIZE_UNDEFINED divideIfAnyDecimal(UInt32 num_scale, UInt32 denom_scale [[maybe_unused]]) const
    {
        if constexpr (IsDecimalNumber<Numerator> && IsDecimalNumber<Denominator>)
        {
--- a/src/AggregateFunctions/AggregateFunctionFactory.cpp
+++ b/src/AggregateFunctions/AggregateFunctionFactory.cpp
@ -18,6 +18,7 @@
 #include <Poco/String.h>
 #include "registerAggregateFunctions.h"

+#include <Functions/FunctionFactory.h>

 namespace DB
 {
@ -135,12 +136,17 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl(
        return combinator->transformAggregateFunction(nested_function, out_properties, argument_types, parameters);
    }

+
+    String extra_info;
+    if (FunctionFactory::instance().hasNameOrAlias(name))
+        extra_info = ". There is an ordinary function with the same name, but aggregate function is expected here";
+
    auto hints = this->getHints(name);
    if (!hints.empty())
-        throw Exception(fmt::format("Unknown aggregate function {}. Maybe you meant: {}", name, toString(hints)),
-            ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION);
+        throw Exception(ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION,
+                        "Unknown aggregate function {}{}. Maybe you meant: {}", name, extra_info, toString(hints));
    else
-        throw Exception(fmt::format("Unknown aggregate function {}", name), ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION);
+        throw Exception(ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION, "Unknown aggregate function {}{}", name, extra_info);
 }


--- a/src/AggregateFunctions/AggregateFunctionSimpleState.cpp
+++ b/src/AggregateFunctions/AggregateFunctionSimpleState.cpp
@ -0,0 +1,32 @@
+#include <AggregateFunctions/AggregateFunctionCombinatorFactory.h>
+#include <AggregateFunctions/AggregateFunctionSimpleState.h>
+
+namespace DB
+{
+namespace
+{
+    class AggregateFunctionCombinatorSimpleState final : public IAggregateFunctionCombinator
+    {
+    public:
+        String getName() const override { return "SimpleState"; }
+
+        DataTypes transformArguments(const DataTypes & arguments) const override { return arguments; }
+
+        AggregateFunctionPtr transformAggregateFunction(
+            const AggregateFunctionPtr & nested_function,
+            const AggregateFunctionProperties &,
+            const DataTypes & arguments,
+            const Array & params) const override
+        {
+            return std::make_shared<AggregateFunctionSimpleState>(nested_function, arguments, params);
+        }
+    };
+
+}
+
+void registerAggregateFunctionCombinatorSimpleState(AggregateFunctionCombinatorFactory & factory)
+{
+    factory.registerCombinator(std::make_shared<AggregateFunctionCombinatorSimpleState>());
+}
+
+}
--- a/src/AggregateFunctions/AggregateFunctionSimpleState.h
+++ b/src/AggregateFunctions/AggregateFunctionSimpleState.h
@ -0,0 +1,77 @@
+#pragma once
+
+#include <AggregateFunctions/IAggregateFunction.h>
+#include <DataTypes/DataTypeCustomSimpleAggregateFunction.h>
+#include <DataTypes/DataTypeFactory.h>
+
+namespace DB
+{
+
+/** Not an aggregate function, but an adapter of aggregate functions.
+  * Aggregate functions with the `SimpleState` suffix is almost identical to the corresponding ones,
+  * except the return type becomes DataTypeCustomSimpleAggregateFunction.
+  */
+class AggregateFunctionSimpleState final : public IAggregateFunctionHelper<AggregateFunctionSimpleState>
+{
+private:
+    AggregateFunctionPtr nested_func;
+    DataTypes arguments;
+    Array params;
+
+public:
+    AggregateFunctionSimpleState(AggregateFunctionPtr nested_, const DataTypes & arguments_, const Array & params_)
+        : IAggregateFunctionHelper<AggregateFunctionSimpleState>(arguments_, params_)
+        , nested_func(nested_)
+        , arguments(arguments_)
+        , params(params_)
+    {
+    }
+
+    String getName() const override { return nested_func->getName() + "SimpleState"; }
+
+    DataTypePtr getReturnType() const override
+    {
+        DataTypeCustomSimpleAggregateFunction::checkSupportedFunctions(nested_func);
+        // Need to make a clone because it'll be customized.
+        auto storage_type = DataTypeFactory::instance().get(nested_func->getReturnType()->getName());
+        DataTypeCustomNamePtr custom_name
+            = std::make_unique<DataTypeCustomSimpleAggregateFunction>(nested_func, DataTypes{nested_func->getReturnType()}, params);
+        storage_type->setCustomization(std::make_unique<DataTypeCustomDesc>(std::move(custom_name), nullptr));
+        return storage_type;
+    }
+
+    void create(AggregateDataPtr place) const override { nested_func->create(place); }
+
+    void destroy(AggregateDataPtr place) const noexcept override { nested_func->destroy(place); }
+
+    bool hasTrivialDestructor() const override { return nested_func->hasTrivialDestructor(); }
+
+    size_t sizeOfData() const override { return nested_func->sizeOfData(); }
+
+    size_t alignOfData() const override { return nested_func->alignOfData(); }
+
+    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    {
+        nested_func->add(place, columns, row_num, arena);
+    }
+
+    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { nested_func->merge(place, rhs, arena); }
+
+    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override { nested_func->serialize(place, buf); }
+
+    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
+    {
+        nested_func->deserialize(place, buf, arena);
+    }
+
+    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena * arena) const override
+    {
+        nested_func->insertResultInto(place, to, arena);
+    }
+
+    bool allocatesMemoryInArena() const override { return nested_func->allocatesMemoryInArena(); }
+
+    AggregateFunctionPtr getNestedFunction() const { return nested_func; }
+};
+
+}
--- a/src/AggregateFunctions/registerAggregateFunctions.cpp
+++ b/src/AggregateFunctions/registerAggregateFunctions.cpp
@ -47,6 +47,7 @@ class AggregateFunctionCombinatorFactory;
 void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &);
 void registerAggregateFunctionCombinatorArray(AggregateFunctionCombinatorFactory &);
 void registerAggregateFunctionCombinatorForEach(AggregateFunctionCombinatorFactory &);
+void registerAggregateFunctionCombinatorSimpleState(AggregateFunctionCombinatorFactory &);
 void registerAggregateFunctionCombinatorState(AggregateFunctionCombinatorFactory &);
 void registerAggregateFunctionCombinatorMerge(AggregateFunctionCombinatorFactory &);
 void registerAggregateFunctionCombinatorNull(AggregateFunctionCombinatorFactory &);
@ -104,6 +105,7 @@ void registerAggregateFunctions()
        registerAggregateFunctionCombinatorIf(factory);
        registerAggregateFunctionCombinatorArray(factory);
        registerAggregateFunctionCombinatorForEach(factory);
+        registerAggregateFunctionCombinatorSimpleState(factory);
        registerAggregateFunctionCombinatorState(factory);
        registerAggregateFunctionCombinatorMerge(factory);
        registerAggregateFunctionCombinatorNull(factory);
--- a/src/AggregateFunctions/ya.make
+++ b/src/AggregateFunctions/ya.make
@ -41,6 +41,7 @@ SRCS(
    AggregateFunctionRetention.cpp
    AggregateFunctionSequenceMatch.cpp
    AggregateFunctionSimpleLinearRegression.cpp
+    AggregateFunctionSimpleState.cpp
    AggregateFunctionState.cpp
    AggregateFunctionStatistics.cpp
    AggregateFunctionStatisticsSimple.cpp
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -88,6 +88,10 @@ if (USE_AWS_S3)
    add_headers_and_sources(dbms Disks/S3)
 endif()

+if (USE_HDFS)
+    add_headers_and_sources(dbms Storages/HDFS)
+endif()
+
 list (APPEND clickhouse_common_io_sources ${CONFIG_BUILD})
 list (APPEND clickhouse_common_io_headers ${CONFIG_VERSION} ${CONFIG_COMMON})

@ -389,8 +393,8 @@ if (USE_GRPC)
 endif()

 if (USE_HDFS)
-    target_link_libraries (clickhouse_common_io PUBLIC ${HDFS3_LIBRARY})
-    target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${HDFS3_INCLUDE_DIR})
+    dbms_target_link_libraries(PRIVATE ${HDFS3_LIBRARY})
+    dbms_target_include_directories (SYSTEM BEFORE PUBLIC ${HDFS3_INCLUDE_DIR})
 endif()

 if (USE_AWS_S3)
--- a/src/Client/Connection.h
+++ b/src/Client/Connection.h
@ -5,8 +5,9 @@
 #include <Poco/Net/StreamSocket.h>

 #include <Common/Throttler.h>
-#include <Common/config.h>
-
+#if !defined(ARCADIA_BUILD)
+#   include <Common/config.h>
+#endif
 #include <Core/Block.h>
 #include <Core/Defines.h>
 #include <IO/Progress.h>
--- a/src/Columns/ColumnMap.cpp
+++ b/src/Columns/ColumnMap.cpp
@ -0,0 +1,241 @@
+#include <Columns/ColumnMap.h>
+#include <Columns/IColumnImpl.h>
+#include <DataStreams/ColumnGathererStream.h>
+#include <IO/WriteBufferFromString.h>
+#include <IO/Operators.h>
+#include <ext/map.h>
+#include <ext/range.h>
+#include <Common/typeid_cast.h>
+#include <Common/assert_cast.h>
+#include <Common/WeakHash.h>
+#include <Core/Field.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int ILLEGAL_COLUMN;
+    extern const int NOT_IMPLEMENTED;
+    extern const int LOGICAL_ERROR;
+}
+
+
+std::string ColumnMap::getName() const
+{
+    WriteBufferFromOwnString res;
+    const auto & nested_tuple = getNestedData();
+    res << "Map(" << nested_tuple.getColumn(0).getName()
+        << ", " << nested_tuple.getColumn(1).getName() << ")";
+
+    return res.str();
+}
+
+ColumnMap::ColumnMap(MutableColumnPtr && nested_)
+    : nested(std::move(nested_))
+{
+    const auto * column_array = typeid_cast<const ColumnArray *>(nested.get());
+    if (!column_array)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "ColumnMap can be created only from array of tuples");
+
+    const auto * column_tuple = typeid_cast<const ColumnTuple *>(column_array->getDataPtr().get());
+    if (!column_tuple)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "ColumnMap can be created only from array of tuples");
+
+    if (column_tuple->getColumns().size() != 2)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "ColumnMap should contain only 2 subcolumns: keys and values");
+
+    for (const auto & column : column_tuple->getColumns())
+        if (isColumnConst(*column))
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "ColumnMap cannot have ColumnConst as its element");
+}
+
+MutableColumnPtr ColumnMap::cloneEmpty() const
+{
+    return ColumnMap::create(nested->cloneEmpty());
+}
+
+MutableColumnPtr ColumnMap::cloneResized(size_t new_size) const
+{
+    return ColumnMap::create(nested->cloneResized(new_size));
+}
+
+Field ColumnMap::operator[](size_t n) const
+{
+    auto array = DB::get<Array>((*nested)[n]);
+    return Map(std::make_move_iterator(array.begin()), std::make_move_iterator(array.end()));
+}
+
+void ColumnMap::get(size_t n, Field & res) const
+{
+    const auto & offsets = getNestedColumn().getOffsets();
+    size_t offset = offsets[n - 1];
+    size_t size = offsets[n] - offsets[n - 1];
+
+    res = Map(size);
+    auto & map = DB::get<Map &>(res);
+
+    for (size_t i = 0; i < size; ++i)
+        getNestedData().get(offset + i, map[i]);
+}
+
+StringRef ColumnMap::getDataAt(size_t) const
+{
+    throw Exception("Method getDataAt is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
+}
+
+void ColumnMap::insertData(const char *, size_t)
+{
+    throw Exception("Method insertData is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
+}
+
+void ColumnMap::insert(const Field & x)
+{
+    const auto & map = DB::get<const Map &>(x);
+    nested->insert(Array(map.begin(), map.end()));
+}
+
+void ColumnMap::insertDefault()
+{
+    nested->insertDefault();
+}
+void ColumnMap::popBack(size_t n)
+{
+    nested->popBack(n);
+}
+
+StringRef ColumnMap::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
+{
+    return nested->serializeValueIntoArena(n, arena, begin);
+}
+
+const char * ColumnMap::deserializeAndInsertFromArena(const char * pos)
+{
+    return nested->deserializeAndInsertFromArena(pos);
+}
+
+void ColumnMap::updateHashWithValue(size_t n, SipHash & hash) const
+{
+    nested->updateHashWithValue(n, hash);
+}
+
+void ColumnMap::updateWeakHash32(WeakHash32 & hash) const
+{
+    nested->updateWeakHash32(hash);
+}
+
+void ColumnMap::updateHashFast(SipHash & hash) const
+{
+    nested->updateHashFast(hash);
+}
+
+void ColumnMap::insertRangeFrom(const IColumn & src, size_t start, size_t length)
+{
+    nested->insertRangeFrom(
+        assert_cast<const ColumnMap &>(src).getNestedColumn(),
+        start, length);
+}
+
+ColumnPtr ColumnMap::filter(const Filter & filt, ssize_t result_size_hint) const
+{
+    auto filtered = nested->filter(filt, result_size_hint);
+    return ColumnMap::create(filtered);
+}
+
+ColumnPtr ColumnMap::permute(const Permutation & perm, size_t limit) const
+{
+    auto permuted = nested->permute(perm, limit);
+    return ColumnMap::create(std::move(permuted));
+}
+
+ColumnPtr ColumnMap::index(const IColumn & indexes, size_t limit) const
+{
+    auto res = nested->index(indexes, limit);
+    return ColumnMap::create(std::move(res));
+}
+
+ColumnPtr ColumnMap::replicate(const Offsets & offsets) const
+{
+    auto replicated = nested->replicate(offsets);
+    return ColumnMap::create(std::move(replicated));
+}
+
+MutableColumns ColumnMap::scatter(ColumnIndex num_columns, const Selector & selector) const
+{
+    auto scattered_columns = nested->scatter(num_columns, selector);
+    MutableColumns res;
+    res.reserve(num_columns);
+    for (auto && scattered : scattered_columns)
+        res.push_back(ColumnMap::create(std::move(scattered)));
+
+    return res;
+}
+
+int ColumnMap::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
+{
+    const auto & rhs_map = assert_cast<const ColumnMap &>(rhs);
+    return nested->compareAt(n, m, rhs_map.getNestedColumn(), nan_direction_hint);
+}
+
+void ColumnMap::compareColumn(const IColumn & rhs, size_t rhs_row_num,
+                                PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
+                                int direction, int nan_direction_hint) const
+{
+    return doCompareColumn<ColumnMap>(assert_cast<const ColumnMap &>(rhs), rhs_row_num, row_indexes,
+                                        compare_results, direction, nan_direction_hint);
+}
+
+void ColumnMap::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const
+{
+    nested->getPermutation(reverse, limit, nan_direction_hint, res);
+}
+
+void ColumnMap::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_range) const
+{
+    nested->updatePermutation(reverse, limit, nan_direction_hint, res, equal_range);
+}
+
+void ColumnMap::gather(ColumnGathererStream & gatherer)
+{
+    gatherer.gather(*this);
+}
+
+void ColumnMap::reserve(size_t n)
+{
+    nested->reserve(n);
+}
+
+size_t ColumnMap::byteSize() const
+{
+    return nested->byteSize();
+}
+
+size_t ColumnMap::allocatedBytes() const
+{
+    return nested->allocatedBytes();
+}
+
+void ColumnMap::protect()
+{
+    nested->protect();
+}
+
+void ColumnMap::getExtremes(Field & min, Field & max) const
+{
+    nested->getExtremes(min, max);
+}
+
+void ColumnMap::forEachSubcolumn(ColumnCallback callback)
+{
+    nested->forEachSubcolumn(callback);
+}
+
+bool ColumnMap::structureEquals(const IColumn & rhs) const
+{
+    if (const auto * rhs_map = typeid_cast<const ColumnMap *>(&rhs))
+        return nested->structureEquals(*rhs_map->nested);
+    return false;
+}
+
+}
--- a/src/Columns/ColumnMap.h
+++ b/src/Columns/ColumnMap.h
@ -0,0 +1,92 @@
+#pragma once
+
+#include <Core/Block.h>
+#include <Columns/ColumnArray.h>
+#include <Columns/ColumnVector.h>
+#include <Columns/ColumnTuple.h>
+
+namespace DB
+{
+
+/** Column, that stores a nested Array(Tuple(key, value)) column.
+ */
+class ColumnMap final : public COWHelper<IColumn, ColumnMap>
+{
+private:
+    friend class COWHelper<IColumn, ColumnMap>;
+
+    WrappedPtr nested;
+
+    explicit ColumnMap(MutableColumnPtr && nested_);
+
+    ColumnMap(const ColumnMap &) = default;
+
+public:
+    /** Create immutable column using immutable arguments. This arguments may be shared with other columns.
+      * Use IColumn::mutate in order to make mutable column and mutate shared nested columns.
+      */
+    using Base = COWHelper<IColumn, ColumnMap>;
+
+    static Ptr create(const ColumnPtr & keys, const ColumnPtr & values, const ColumnPtr & offsets)
+    {
+        auto nested_column = ColumnArray::create(ColumnTuple::create(Columns{keys, values}), offsets);
+        return ColumnMap::create(nested_column);
+    }
+
+    static Ptr create(const ColumnPtr & column) { return ColumnMap::create(column->assumeMutable()); }
+    static Ptr create(ColumnPtr && arg) { return create(arg); }
+
+    template <typename Arg, typename = typename std::enable_if<std::is_rvalue_reference<Arg &&>::value>::type>
+    static MutablePtr create(Arg && arg) { return Base::create(std::forward<Arg>(arg)); }
+
+    std::string getName() const override;
+    const char * getFamilyName() const override { return "Map"; }
+    TypeIndex getDataType() const override { return TypeIndex::Map; }
+
+    MutableColumnPtr cloneEmpty() const override;
+    MutableColumnPtr cloneResized(size_t size) const override;
+
+    size_t size() const override { return nested->size(); }
+
+    Field operator[](size_t n) const override;
+    void get(size_t n, Field & res) const override;
+
+    StringRef getDataAt(size_t n) const override;
+    void insertData(const char * pos, size_t length) override;
+    void insert(const Field & x) override;
+    void insertDefault() override;
+    void popBack(size_t n) override;
+    StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
+    const char * deserializeAndInsertFromArena(const char * pos) override;
+    void updateHashWithValue(size_t n, SipHash & hash) const override;
+    void updateWeakHash32(WeakHash32 & hash) const override;
+    void updateHashFast(SipHash & hash) const override;
+    void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
+    ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
+    ColumnPtr permute(const Permutation & perm, size_t limit) const override;
+    ColumnPtr index(const IColumn & indexes, size_t limit) const override;
+    ColumnPtr replicate(const Offsets & offsets) const override;
+    MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override;
+    void gather(ColumnGathererStream & gatherer_stream) override;
+    int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
+    void compareColumn(const IColumn & rhs, size_t rhs_row_num,
+                       PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
+                       int direction, int nan_direction_hint) const override;
+    void getExtremes(Field & min, Field & max) const override;
+    void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
+    void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_range) const override;
+    void reserve(size_t n) override;
+    size_t byteSize() const override;
+    size_t allocatedBytes() const override;
+    void protect() override;
+    void forEachSubcolumn(ColumnCallback callback) override;
+    bool structureEquals(const IColumn & rhs) const override;
+
+    const ColumnArray & getNestedColumn() const { return assert_cast<const ColumnArray &>(*nested); }
+    ColumnArray & getNestedColumn() { return assert_cast<ColumnArray &>(*nested); }
+
+    const ColumnTuple & getNestedData() const { return assert_cast<const ColumnTuple &>(getNestedColumn().getData()); }
+    ColumnTuple & getNestedData() { return assert_cast<ColumnTuple &>(getNestedColumn().getData()); }
+};
+
+}
--- a/src/Columns/ya.make
+++ b/src/Columns/ya.make
@ -24,6 +24,7 @@ SRCS(
    ColumnFixedString.cpp
    ColumnFunction.cpp
    ColumnLowCardinality.cpp
+    ColumnMap.cpp
    ColumnNullable.cpp
    ColumnString.cpp
    ColumnTuple.cpp
--- a/src/Common/Allocator.h
+++ b/src/Common/Allocator.h
@ -26,6 +26,7 @@
    #define DISABLE_MREMAP 1
 #endif
 #include <common/mremap.h>
+#include <common/getPageSize.h>

 #include <Common/MemoryTracker.h>
 #include <Common/Exception.h>
@ -59,7 +60,6 @@
  */
 extern const size_t MMAP_THRESHOLD;

-static constexpr size_t MMAP_MIN_ALIGNMENT = 4096;
 static constexpr size_t MALLOC_MIN_ALIGNMENT = 8;

 namespace DB
@ -194,10 +194,11 @@ private:
    void * allocNoTrack(size_t size, size_t alignment)
    {
        void * buf;
+        size_t mmap_min_alignment = ::getPageSize();

        if (size >= MMAP_THRESHOLD)
        {
-            if (alignment > MMAP_MIN_ALIGNMENT)
+            if (alignment > mmap_min_alignment)
                throw DB::Exception(fmt::format("Too large alignment {}: more than page size when allocating {}.",
                    ReadableSize(alignment), ReadableSize(size)), DB::ErrorCodes::BAD_ARGUMENTS);

--- a/src/Common/Arena.h
+++ b/src/Common/Arena.h
@ -83,10 +83,11 @@ private:
    /// Last contiguous chunk of memory.
    Chunk * head;
    size_t size_in_bytes;
+    size_t page_size;

-    static size_t roundUpToPageSize(size_t s)
+    static size_t roundUpToPageSize(size_t s, size_t page_size)
    {
-        return (s + 4096 - 1) / 4096 * 4096;
+        return (s + page_size - 1) / page_size * page_size;
    }

    /// If chunks size is less than 'linear_growth_threshold', then use exponential growth, otherwise - linear growth
@ -113,7 +114,7 @@ private:
        }

        assert(size_after_grow >= min_next_size);
-        return roundUpToPageSize(size_after_grow);
+        return roundUpToPageSize(size_after_grow, page_size);
    }

    /// Add next contiguous chunk of memory with size not less than specified.
@ -129,7 +130,8 @@ private:
 public:
    Arena(size_t initial_size_ = 4096, size_t growth_factor_ = 2, size_t linear_growth_threshold_ = 128 * 1024 * 1024)
        : growth_factor(growth_factor_), linear_growth_threshold(linear_growth_threshold_),
-        head(new Chunk(initial_size_, nullptr)), size_in_bytes(head->size())
+        head(new Chunk(initial_size_, nullptr)), size_in_bytes(head->size()),
+        page_size(static_cast<size_t>(::getPageSize()))
    {
    }

--- a/src/Common/ArrayCache.h
+++ b/src/Common/ArrayCache.h
@ -13,6 +13,8 @@
 #include <boost/noncopyable.hpp>
 #include <ext/scope_guard.h>

+#include <common/getPageSize.h>
+
 #include <Common/Exception.h>
 #include <Common/randomSeed.h>
 #include <Common/formatReadable.h>
@ -326,8 +328,6 @@ private:
        return (x + (rounding - 1)) / rounding * rounding;
    }

-    static constexpr size_t page_size = 4096;
-
    /// Sizes and addresses of allocated memory will be aligned to specified boundary.
    static constexpr size_t alignment = 16;

@ -505,6 +505,7 @@ private:

        /// If nothing was found and total size of allocated chunks plus required size is lower than maximum,
        ///  allocate a new chunk.
+        size_t page_size = static_cast<size_t>(::getPageSize());
        size_t required_chunk_size = std::max(min_chunk_size, roundUp(size, page_size));
        if (total_chunks_size + required_chunk_size <= max_total_size)
        {
--- a/src/Common/Config/ConfigProcessor.cpp
+++ b/src/Common/Config/ConfigProcessor.cpp
@ -16,9 +16,11 @@
 #include <Common/StringUtils/StringUtils.h>
 #include <Common/Exception.h>
 #include <common/getResource.h>
+#include <common/errnoToString.h>
 #include <IO/WriteBufferFromString.h>
 #include <IO/Operators.h>

+
 #define PREPROCESSED_SUFFIX "-preprocessed"


@ -234,7 +236,7 @@ static std::string layerFromHost()
 {
    utsname buf;
    if (uname(&buf))
-        throw Poco::Exception(std::string("uname failed: ") + std::strerror(errno));
+        throw Poco::Exception(std::string("uname failed: ") + errnoToString(errno));

    std::string layer = numberFromHost(buf.nodename);
    if (layer.empty())
--- a/src/Common/ErrorCodes.cpp
+++ b/src/Common/ErrorCodes.cpp
@ -529,6 +529,7 @@
    M(560, ZSTD_ENCODER_FAILED) \
    M(561, ZSTD_DECODER_FAILED) \
    M(562, TLD_LIST_NOT_FOUND) \
+    M(563, CANNOT_READ_MAP_FROM_TEXT) \
    \
    M(999, KEEPER_EXCEPTION) \
    M(1000, POCO_EXCEPTION) \
--- a/src/Common/FieldVisitors.cpp
+++ b/src/Common/FieldVisitors.cpp
@ -93,6 +93,22 @@ String FieldVisitorDump::operator() (const Tuple & x) const
    return wb.str();
 }

+String FieldVisitorDump::operator() (const Map & x) const
+{
+    WriteBufferFromOwnString wb;
+
+    wb << "Map_(";
+    for (auto it = x.begin(); it != x.end(); ++it)
+    {
+        if (it != x.begin())
+            wb << ", ";
+        wb << applyVisitor(*this, *it);
+    }
+    wb << ')';
+
+    return wb.str();
+}
+
 String FieldVisitorDump::operator() (const AggregateFunctionStateData & x) const
 {
    WriteBufferFromOwnString wb;
@ -176,6 +192,82 @@ String FieldVisitorToString::operator() (const Tuple & x) const
    return wb.str();
 }

+String FieldVisitorToString::operator() (const Map & x) const
+{
+    WriteBufferFromOwnString wb;
+
+    wb << '(';
+    for (auto it = x.begin(); it != x.end(); ++it)
+    {
+        if (it != x.begin())
+            wb << ", ";
+        wb << applyVisitor(*this, *it);
+    }
+    wb << ')';
+
+    return wb.str();
+}
+
+
+void FieldVisitorWriteBinary::operator() (const Null &, WriteBuffer &) const { }
+void FieldVisitorWriteBinary::operator() (const UInt64 & x, WriteBuffer & buf) const { DB::writeVarUInt(x, buf); }
+void FieldVisitorWriteBinary::operator() (const Int64 & x, WriteBuffer & buf) const { DB::writeVarInt(x, buf); }
+void FieldVisitorWriteBinary::operator() (const Float64 & x, WriteBuffer & buf) const { DB::writeFloatBinary(x, buf); }
+void FieldVisitorWriteBinary::operator() (const String & x, WriteBuffer & buf) const { DB::writeStringBinary(x, buf); }
+void FieldVisitorWriteBinary::operator() (const UInt128 & x, WriteBuffer & buf) const { DB::writeBinary(x, buf); }
+void FieldVisitorWriteBinary::operator() (const Int128 & x, WriteBuffer & buf) const { DB::writeVarInt(x, buf); }
+void FieldVisitorWriteBinary::operator() (const UInt256 & x, WriteBuffer & buf) const { DB::writeBinary(x, buf); }
+void FieldVisitorWriteBinary::operator() (const Int256 & x, WriteBuffer & buf) const { DB::writeBinary(x, buf); }
+void FieldVisitorWriteBinary::operator() (const DecimalField<Decimal32> & x, WriteBuffer & buf) const { DB::writeBinary(x.getValue(), buf); }
+void FieldVisitorWriteBinary::operator() (const DecimalField<Decimal64> & x, WriteBuffer & buf) const { DB::writeBinary(x.getValue(), buf); }
+void FieldVisitorWriteBinary::operator() (const DecimalField<Decimal128> & x, WriteBuffer & buf) const { DB::writeBinary(x.getValue(), buf); }
+void FieldVisitorWriteBinary::operator() (const DecimalField<Decimal256> & x, WriteBuffer & buf) const { DB::writeBinary(x.getValue(), buf); }
+void FieldVisitorWriteBinary::operator() (const AggregateFunctionStateData & x, WriteBuffer & buf) const
+{
+    DB::writeStringBinary(x.name, buf);
+    DB::writeStringBinary(x.data, buf);
+}
+
+void FieldVisitorWriteBinary::operator() (const Array & x, WriteBuffer & buf) const
+{
+    const size_t size = x.size();
+    DB::writeBinary(size, buf);
+
+    for (size_t i = 0; i < size; ++i)
+    {
+        const UInt8 type = x[i].getType();
+        DB::writeBinary(type, buf);
+        Field::dispatch([&buf] (const auto & value) { DB::FieldVisitorWriteBinary()(value, buf); }, x[i]);
+    }
+}
+
+void FieldVisitorWriteBinary::operator() (const Tuple & x, WriteBuffer & buf) const
+{
+    const size_t size = x.size();
+    DB::writeBinary(size, buf);
+
+    for (size_t i = 0; i < size; ++i)
+    {
+        const UInt8 type = x[i].getType();
+        DB::writeBinary(type, buf);
+        Field::dispatch([&buf] (const auto & value) { DB::FieldVisitorWriteBinary()(value, buf); }, x[i]);
+    }
+}
+
+
+void FieldVisitorWriteBinary::operator() (const Map & x, WriteBuffer & buf) const
+{
+    const size_t size = x.size();
+    DB::writeBinary(size, buf);
+
+    for (size_t i = 0; i < size; ++i)
+    {
+        const UInt8 type = x[i].getType();
+        writeBinary(type, buf);
+        Field::dispatch([&buf] (const auto & value) { DB::FieldVisitorWriteBinary()(value, buf); }, x[i]);
+    }
+}
+

 FieldVisitorHash::FieldVisitorHash(SipHash & hash_) : hash(hash_) {}

@ -238,6 +330,16 @@ void FieldVisitorHash::operator() (const Tuple & x) const
        applyVisitor(*this, elem);
 }

+void FieldVisitorHash::operator() (const Map & x) const
+{
+    UInt8 type = Field::Types::Map;
+    hash.update(type);
+    hash.update(x.size());
+
+    for (const auto & elem : x)
+        applyVisitor(*this, elem);
+}
+
 void FieldVisitorHash::operator() (const Array & x) const
 {
    UInt8 type = Field::Types::Array;
--- a/src/Common/FieldVisitors.h
+++ b/src/Common/FieldVisitors.h
@ -77,6 +77,7 @@ public:
    String operator() (const String & x) const;
    String operator() (const Array & x) const;
    String operator() (const Tuple & x) const;
+    String operator() (const Map & x) const;
    String operator() (const DecimalField<Decimal32> & x) const;
    String operator() (const DecimalField<Decimal64> & x) const;
    String operator() (const DecimalField<Decimal128> & x) const;
@ -88,6 +89,30 @@ public:
 };


+class FieldVisitorWriteBinary
+{
+public:
+    void operator() (const Null & x, WriteBuffer & buf) const;
+    void operator() (const UInt64 & x, WriteBuffer & buf) const;
+    void operator() (const UInt128 & x, WriteBuffer & buf) const;
+    void operator() (const Int64 & x, WriteBuffer & buf) const;
+    void operator() (const Int128 & x, WriteBuffer & buf) const;
+    void operator() (const Float64 & x, WriteBuffer & buf) const;
+    void operator() (const String & x, WriteBuffer & buf) const;
+    void operator() (const Array & x, WriteBuffer & buf) const;
+    void operator() (const Tuple & x, WriteBuffer & buf) const;
+    void operator() (const Map & x, WriteBuffer & buf) const;
+    void operator() (const DecimalField<Decimal32> & x, WriteBuffer & buf) const;
+    void operator() (const DecimalField<Decimal64> & x, WriteBuffer & buf) const;
+    void operator() (const DecimalField<Decimal128> & x, WriteBuffer & buf) const;
+    void operator() (const DecimalField<Decimal256> & x, WriteBuffer & buf) const;
+    void operator() (const AggregateFunctionStateData & x, WriteBuffer & buf) const;
+
+    void operator() (const UInt256 & x, WriteBuffer & buf) const;
+    void operator() (const Int256 & x, WriteBuffer & buf) const;
+};
+
+
 /** Print readable and unique text dump of field type and value. */
 class FieldVisitorDump : public StaticVisitor<String>
 {
@ -101,6 +126,7 @@ public:
    String operator() (const String & x) const;
    String operator() (const Array & x) const;
    String operator() (const Tuple & x) const;
+    String operator() (const Map & x) const;
    String operator() (const DecimalField<Decimal32> & x) const;
    String operator() (const DecimalField<Decimal64> & x) const;
    String operator() (const DecimalField<Decimal128> & x) const;
@ -137,6 +163,11 @@ public:
        throw Exception("Cannot convert Tuple to " + demangle(typeid(T).name()), ErrorCodes::CANNOT_CONVERT_TYPE);
    }

+    T operator() (const Map &) const
+    {
+        throw Exception("Cannot convert Map to " + demangle(typeid(T).name()), ErrorCodes::CANNOT_CONVERT_TYPE);
+    }
+
    T operator() (const UInt64 & x) const { return T(x); }
    T operator() (const Int64 & x) const { return T(x); }
    T operator() (const Int128 & x) const { return T(x); }
@ -226,6 +257,7 @@ public:
    void operator() (const String & x) const;
    void operator() (const Array & x) const;
    void operator() (const Tuple & x) const;
+    void operator() (const Map & x) const;
    void operator() (const DecimalField<Decimal32> & x) const;
    void operator() (const DecimalField<Decimal64> & x) const;
    void operator() (const DecimalField<Decimal128> & x) const;
@ -268,6 +300,7 @@ public:
    bool operator() (String &) const { throw Exception("Cannot sum Strings", ErrorCodes::LOGICAL_ERROR); }
    bool operator() (Array &) const { throw Exception("Cannot sum Arrays", ErrorCodes::LOGICAL_ERROR); }
    bool operator() (Tuple &) const { throw Exception("Cannot sum Tuples", ErrorCodes::LOGICAL_ERROR); }
+    bool operator() (Map &) const { throw Exception("Cannot sum Maps", ErrorCodes::LOGICAL_ERROR); }
    bool operator() (UInt128 &) const { throw Exception("Cannot sum UUIDs", ErrorCodes::LOGICAL_ERROR); }
    bool operator() (AggregateFunctionStateData &) const { throw Exception("Cannot sum AggregateFunctionStates", ErrorCodes::LOGICAL_ERROR); }

--- a/src/Common/IFactoryWithAliases.h
+++ b/src/Common/IFactoryWithAliases.h
@ -106,6 +106,11 @@ public:
        return aliases.count(name) || case_insensitive_aliases.count(name);
    }

+    bool hasNameOrAlias(const String & name) const
+    {
+        return getMap().count(name) || getCaseInsensitiveMap().count(name) || isAlias(name);
+    }
+
    virtual ~IFactoryWithAliases() override {}

 private:
--- a/src/Common/MemoryStatisticsOS.cpp
+++ b/src/Common/MemoryStatisticsOS.cpp
@ -8,10 +8,11 @@

 #include "MemoryStatisticsOS.h"

+#include <common/logger_useful.h>
+#include <common/getPageSize.h>
 #include <Common/Exception.h>
 #include <IO/ReadBufferFromMemory.h>
 #include <IO/ReadHelpers.h>
-#include <common/logger_useful.h>


 namespace DB
@ -26,7 +27,6 @@ namespace ErrorCodes
 }

 static constexpr auto filename = "/proc/self/statm";
-static constexpr size_t PAGE_SIZE = 4096;

 MemoryStatisticsOS::MemoryStatisticsOS()
 {
@ -93,11 +93,12 @@ MemoryStatisticsOS::Data MemoryStatisticsOS::get() const
    skipWhitespaceIfAny(in);
    readIntText(data.data_and_stack, in);

-    data.virt *= PAGE_SIZE;
-    data.resident *= PAGE_SIZE;
-    data.shared *= PAGE_SIZE;
-    data.code *= PAGE_SIZE;
-    data.data_and_stack *= PAGE_SIZE;
+    size_t page_size = static_cast<size_t>(::getPageSize());
+    data.virt *= page_size;
+    data.resident *= page_size;
+    data.shared *= page_size;
+    data.code *= page_size;
+    data.data_and_stack *= page_size;

    return data;
 }
--- a/src/Common/StringSearcher.h
+++ b/src/Common/StringSearcher.h
@ -1,5 +1,6 @@
 #pragma once

+#include <common/getPageSize.h>
 #include <Common/Exception.h>
 #include <Common/StringUtils/StringUtils.h>
 #include <Common/UTF8Helpers.h>
@ -37,7 +38,7 @@ struct StringSearcherBase
 {
 #ifdef __SSE2__
    static constexpr auto n = sizeof(__m128i);
-    const int page_size = getpagesize();
+    const int page_size = ::getPageSize();

    bool pageSafe(const void * const ptr) const
    {
--- a/src/Common/ThreadProfileEvents.cpp
+++ b/src/Common/ThreadProfileEvents.cpp
@ -21,6 +21,9 @@
 #include <sys/types.h>
 #include <dirent.h>

+#include <common/errnoToString.h>
+
+
 namespace DB
 {

@ -246,7 +249,7 @@ static void enablePerfEvent(int event_fd)
    {
        LOG_WARNING(&Poco::Logger::get("PerfEvents"),
            "Can't enable perf event with file descriptor {}: '{}' ({})",
-            event_fd, strerror(errno), errno);
+            event_fd, errnoToString(errno), errno);
    }
 }

@ -256,7 +259,7 @@ static void disablePerfEvent(int event_fd)
    {
        LOG_WARNING(&Poco::Logger::get("PerfEvents"),
            "Can't disable perf event with file descriptor {}: '{}' ({})",
-            event_fd, strerror(errno), errno);
+            event_fd, errnoToString(errno), errno);
    }
 }

@ -266,7 +269,7 @@ static void releasePerfEvent(int event_fd)
    {
        LOG_WARNING(&Poco::Logger::get("PerfEvents"),
            "Can't close perf event file descriptor {}: {} ({})",
-            event_fd, strerror(errno), errno);
+            event_fd, errnoToString(errno), errno);
    }
 }

@ -284,7 +287,7 @@ static bool validatePerfEventDescriptor(int & fd)
    {
        LOG_WARNING(&Poco::Logger::get("PerfEvents"),
            "Error while checking availability of event descriptor {}: {} ({})",
-            fd, strerror(errno), errno);
+            fd, errnoToString(errno), errno);

        disablePerfEvent(fd);
        releasePerfEvent(fd);
@ -391,7 +394,7 @@ bool PerfEventsCounters::processThreadLocalChanges(const std::string & needed_ev
            LOG_WARNING(&Poco::Logger::get("PerfEvents"),
                "Failed to open perf event {} (event_type={}, event_config={}): "
                "'{}' ({})", event_info.settings_name, event_info.event_type,
-                event_info.event_config, strerror(errno), errno);
+                event_info.event_config, errnoToString(errno), errno);
        }
    }

@ -477,7 +480,7 @@ void PerfEventsCounters::finalizeProfileEvents(ProfileEvents::Counters & profile
        {
            LOG_WARNING(&Poco::Logger::get("PerfEvents"),
                "Can't read event value from file descriptor {}: '{}' ({})",
-                fd, strerror(errno), errno);
+                fd, errnoToString(errno), errno);
            current_values[i] = {};
        }
    }
--- a/src/Common/ThreadStatus.cpp
+++ b/src/Common/ThreadStatus.cpp
@ -2,11 +2,14 @@
 #include <Common/ThreadProfileEvents.h>
 #include <Common/QueryProfiler.h>
 #include <Common/ThreadStatus.h>
+#include <common/errnoToString.h>
 #include <Interpreters/OpenTelemetrySpanLog.h>

 #include <Poco/Logger.h>
 #include <common/getThreadId.h>

+#include <signal.h>
+

 namespace DB
 {
@ -21,6 +24,11 @@ namespace ErrorCodes
 thread_local ThreadStatus * current_thread = nullptr;
 thread_local ThreadStatus * main_thread = nullptr;

+#if !defined(SANITIZER) && !defined(ARCADIA_BUILD)
+    alignas(4096) static thread_local char alt_stack[4096];
+    static thread_local bool has_alt_stack = false;
+#endif
+

 ThreadStatus::ThreadStatus()
    : thread_id{getThreadId()}
@ -34,6 +42,46 @@ ThreadStatus::ThreadStatus()

    /// NOTE: It is important not to do any non-trivial actions (like updating ProfileEvents or logging) before ThreadStatus is created
    /// Otherwise it could lead to SIGSEGV due to current_thread dereferencing
+
+    /// Will set alternative signal stack to provide diagnostics for stack overflow errors.
+    /// If not already installed for current thread.
+    /// Sanitizer makes larger stack usage and also it's incompatible with alternative stack by default (it sets up and relies on its own).
+#if !defined(SANITIZER) && !defined(ARCADIA_BUILD)
+    if (!has_alt_stack)
+    {
+        /// Don't repeat tries even if not installed successfully.
+        has_alt_stack = true;
+
+        /// We have to call 'sigaltstack' before first 'sigaction'. (It does not work other way, for unknown reason).
+        stack_t altstack_description{};
+        altstack_description.ss_sp = alt_stack;
+        altstack_description.ss_flags = 0;
+        altstack_description.ss_size = sizeof(alt_stack);
+
+        if (0 != sigaltstack(&altstack_description, nullptr))
+        {
+            LOG_WARNING(log, "Cannot set alternative signal stack for thread, {}", errnoToString(errno));
+        }
+        else
+        {
+            /// Obtain existing sigaction and modify it by adding a flag.
+            struct sigaction action{};
+            if (0 != sigaction(SIGSEGV, nullptr, &action))
+            {
+                LOG_WARNING(log, "Cannot obtain previous signal action to set alternative signal stack for thread, {}", errnoToString(errno));
+            }
+            else if (!(action.sa_flags & SA_ONSTACK))
+            {
+                action.sa_flags |= SA_ONSTACK;
+
+                if (0 != sigaction(SIGSEGV, &action, nullptr))
+                {
+                    LOG_WARNING(log, "Cannot set action with alternative signal stack for thread, {}", errnoToString(errno));
+                }
+            }
+        }
+    }
+#endif
 }

 ThreadStatus::~ThreadStatus()
--- a/src/Common/UInt128.h
+++ b/src/Common/UInt128.h
@ -28,23 +28,28 @@ struct UInt128
    UInt64 low;
    UInt64 high;

+    /// TODO: Make this constexpr. Currently it is used in unions
+    /// and union cannot contain member with non trivial constructor
+    /// constructor must be non user provided but compiler cannot constexpr constructor
+    /// if members low and high are not initialized, if we default member initialize them
+    /// constructor becomes non trivial.
    UInt128() = default;
-    explicit UInt128(const UInt64 low_, const UInt64 high_) : low(low_), high(high_) {}
+    explicit constexpr UInt128(const UInt64 low_, const UInt64 high_) : low(low_), high(high_) { }

    /// We need Int128 to UInt128 conversion or AccurateComparison will call greaterOp<Int128, UInt64> instead of greaterOp<Int128, UInt128>
-    explicit UInt128(const Int128 rhs) : low(rhs), high(rhs >> 64) {}
-    explicit UInt128(const Int64 rhs) : low(rhs), high() {}
-    explicit UInt128(const Int32 rhs) : low(rhs), high() {}
-    explicit UInt128(const Int16 rhs) : low(rhs), high() {}
-    explicit UInt128(const Int8 rhs) : low(rhs), high() {}
-    explicit UInt128(const UInt8 rhs) : low(rhs), high() {}
-    explicit UInt128(const UInt16 rhs) : low(rhs), high() {}
-    explicit UInt128(const UInt32 rhs) : low(rhs), high() {}
-    explicit UInt128(const UInt64 rhs) : low(rhs), high() {}
-    explicit UInt128(const Float32 rhs) : low(rhs), high() {}
-    explicit UInt128(const Float64 rhs) : low(rhs), high() {}
+    explicit constexpr UInt128(const Int128 rhs) : low(rhs), high(rhs >> 64) {}
+    explicit constexpr UInt128(const Int64 rhs) : low(rhs), high() {}
+    explicit constexpr UInt128(const Int32 rhs) : low(rhs), high() {}
+    explicit constexpr UInt128(const Int16 rhs) : low(rhs), high() {}
+    explicit constexpr UInt128(const Int8 rhs) : low(rhs), high() {}
+    explicit constexpr UInt128(const UInt8 rhs) : low(rhs), high() {}
+    explicit constexpr UInt128(const UInt16 rhs) : low(rhs), high() {}
+    explicit constexpr UInt128(const UInt32 rhs) : low(rhs), high() {}
+    explicit constexpr UInt128(const UInt64 rhs) : low(rhs), high() {}
+    explicit constexpr UInt128(const Float32 rhs) : low(rhs), high() {}
+    explicit constexpr UInt128(const Float64 rhs) : low(rhs), high() {}

-    auto tuple() const { return std::tie(high, low); }
+    constexpr auto tuple() const { return std::tie(high, low); }

    String toHexString() const
    {
@ -53,31 +58,31 @@ struct UInt128
        return res;
    }

-    bool inline operator== (const UInt128 rhs) const { return tuple() == rhs.tuple(); }
-    bool inline operator!= (const UInt128 rhs) const { return tuple() != rhs.tuple(); }
-    bool inline operator<  (const UInt128 rhs) const { return tuple() < rhs.tuple(); }
-    bool inline operator<= (const UInt128 rhs) const { return tuple() <= rhs.tuple(); }
-    bool inline operator>  (const UInt128 rhs) const { return tuple() > rhs.tuple(); }
-    bool inline operator>= (const UInt128 rhs) const { return tuple() >= rhs.tuple(); }
+    constexpr bool operator== (const UInt128 rhs) const { return tuple() == rhs.tuple(); }
+    constexpr bool operator!= (const UInt128 rhs) const { return tuple() != rhs.tuple(); }
+    constexpr bool operator<  (const UInt128 rhs) const { return tuple() < rhs.tuple(); }
+    constexpr bool operator<= (const UInt128 rhs) const { return tuple() <= rhs.tuple(); }
+    constexpr bool operator>  (const UInt128 rhs) const { return tuple() > rhs.tuple(); }
+    constexpr bool operator>= (const UInt128 rhs) const { return tuple() >= rhs.tuple(); }

-    bool inline operator == (const Int128 rhs) const { return *this == UInt128(rhs, rhs >> 64) && rhs >= 0; }
-    bool inline operator != (const Int128 rhs) const { return *this != UInt128(rhs, rhs >> 64) || rhs < 0; }
-    bool inline operator >= (const Int128 rhs) const { return *this >= UInt128(rhs, rhs >> 64) || rhs < 0; }
-    bool inline operator >  (const Int128 rhs) const { return *this >  UInt128(rhs, rhs >> 64) || rhs < 0; }
-    bool inline operator <= (const Int128 rhs) const { return *this <= UInt128(rhs, rhs >> 64) && rhs >= 0; }
-    bool inline operator <  (const Int128 rhs) const { return *this <  UInt128(rhs, rhs >> 64) && rhs >= 0; }
+    constexpr bool operator == (const Int128 rhs) const { return *this == UInt128(rhs, rhs >> 64) && rhs >= 0; }
+    constexpr bool operator != (const Int128 rhs) const { return *this != UInt128(rhs, rhs >> 64) || rhs < 0; }
+    constexpr bool operator >= (const Int128 rhs) const { return *this >= UInt128(rhs, rhs >> 64) || rhs < 0; }
+    constexpr bool operator >  (const Int128 rhs) const { return *this >  UInt128(rhs, rhs >> 64) || rhs < 0; }
+    constexpr bool operator <= (const Int128 rhs) const { return *this <= UInt128(rhs, rhs >> 64) && rhs >= 0; }
+    constexpr bool operator <  (const Int128 rhs) const { return *this <  UInt128(rhs, rhs >> 64) && rhs >= 0; }

-    bool inline operator >  (const Int256 rhs) const { return (rhs < 0) || ((Int256(high) << 64) + low) > rhs; }
-    bool inline operator >  (const UInt256 rhs) const { return ((UInt256(high) << 64) + low) > rhs; }
-    bool inline operator <  (const Int256 rhs) const { return (rhs >= 0) && ((Int256(high) << 64) + low) < rhs; }
-    bool inline operator <  (const UInt256 rhs) const { return ((UInt256(high) << 64) + low) < rhs; }
+    constexpr bool operator >  (const Int256 rhs) const { return (rhs < 0) || ((Int256(high) << 64) + low) > rhs; }
+    constexpr bool operator >  (const UInt256 rhs) const { return ((UInt256(high) << 64) + low) > rhs; }
+    constexpr bool operator <  (const Int256 rhs) const { return (rhs >= 0) && ((Int256(high) << 64) + low) < rhs; }
+    constexpr bool operator <  (const UInt256 rhs) const { return ((UInt256(high) << 64) + low) < rhs; }

-    template <typename T> bool inline operator== (const T rhs) const { return *this == UInt128(rhs); }
-    template <typename T> bool inline operator!= (const T rhs) const { return *this != UInt128(rhs); }
-    template <typename T> bool inline operator>= (const T rhs) const { return *this >= UInt128(rhs); }
-    template <typename T> bool inline operator>  (const T rhs) const { return *this >  UInt128(rhs); }
-    template <typename T> bool inline operator<= (const T rhs) const { return *this <= UInt128(rhs); }
-    template <typename T> bool inline operator<  (const T rhs) const { return *this <  UInt128(rhs); }
+    template <typename T> constexpr bool operator== (const T rhs) const { return *this == UInt128(rhs); }
+    template <typename T> constexpr bool operator!= (const T rhs) const { return *this != UInt128(rhs); }
+    template <typename T> constexpr bool operator>= (const T rhs) const { return *this >= UInt128(rhs); }
+    template <typename T> constexpr bool operator>  (const T rhs) const { return *this >  UInt128(rhs); }
+    template <typename T> constexpr bool operator<= (const T rhs) const { return *this <= UInt128(rhs); }
+    template <typename T> constexpr bool operator<  (const T rhs) const { return *this <  UInt128(rhs); }

    template <typename T> explicit operator T() const
    {
@ -91,15 +96,15 @@ struct UInt128
 #pragma GCC diagnostic pop
 #endif

-    UInt128 & operator= (const UInt64 rhs) { low = rhs; high = 0; return *this; }
+    constexpr UInt128 & operator= (const UInt64 rhs) { low = rhs; high = 0; return *this; }
 };

-template <typename T> bool inline operator == (T a, const UInt128 b) { return b.operator==(a); }
-template <typename T> bool inline operator != (T a, const UInt128 b) { return b.operator!=(a); }
-template <typename T> bool inline operator >= (T a, const UInt128 b) { return b <= a; }
-template <typename T> bool inline operator >  (T a, const UInt128 b) { return b < a; }
-template <typename T> bool inline operator <= (T a, const UInt128 b) { return b >= a; }
-template <typename T> bool inline operator <  (T a, const UInt128 b) { return b > a; }
+template <typename T> constexpr bool operator == (T a, const UInt128 b) { return b.operator==(a); }
+template <typename T> constexpr bool operator != (T a, const UInt128 b) { return b.operator!=(a); }
+template <typename T> constexpr bool operator >= (T a, const UInt128 b) { return b <= a; }
+template <typename T> constexpr bool operator >  (T a, const UInt128 b) { return b < a; }
+template <typename T> constexpr bool operator <= (T a, const UInt128 b) { return b >= a; }
+template <typename T> constexpr bool operator <  (T a, const UInt128 b) { return b > a; }

 template <> inline constexpr bool IsNumber<UInt128> = true;
 template <> struct TypeName<UInt128> { static constexpr const char * get() { return "UInt128"; } };
@ -246,4 +251,42 @@ template <> struct hash<DB::UInt128>
    }
 };

+template<>
+class numeric_limits<DB::UInt128>
+{
+public:
+    static constexpr bool is_specialized = true;
+    static constexpr bool is_signed = ::is_signed<DB::UInt128>::value;
+    static constexpr bool is_integer = ::is_integer<DB::UInt128>::value;
+    static constexpr bool is_exact = true;
+    static constexpr bool has_infinity = false;
+    static constexpr bool has_quiet_NaN = false;
+    static constexpr bool has_signaling_NaN = false;
+    static constexpr std::float_denorm_style has_denorm = std::denorm_absent;
+    static constexpr bool has_denorm_loss = false;
+    static constexpr std::float_round_style round_style = std::round_toward_zero;
+    static constexpr bool is_iec559 = false;
+    static constexpr bool is_bounded = true;
+    static constexpr bool is_modulo = true;
+    static constexpr int digits = std::numeric_limits<UInt64>::digits * 2;
+    static constexpr int digits10 = digits * 0.30103 /*std::log10(2)*/;
+    static constexpr int max_digits10 = 0;
+    static constexpr int radix = 2;
+    static constexpr int min_exponent = 0;
+    static constexpr int min_exponent10 = 0;
+    static constexpr int max_exponent = 0;
+    static constexpr int max_exponent10 = 0;
+    static constexpr bool traps = true;
+    static constexpr bool tinyness_before = false;
+
+    static constexpr DB::UInt128 min() noexcept { return DB::UInt128(0, 0); }
+
+    static constexpr DB::UInt128 max() noexcept
+    {
+        return DB::UInt128(std::numeric_limits<UInt64>::max(), std::numeric_limits<UInt64>::max());
+    }
+
+    static constexpr DB::UInt128 lowest() noexcept { return min(); }
+};
+
 }
--- a/src/Common/ZooKeeper/TestKeeperStorage.cpp
+++ b/src/Common/ZooKeeper/TestKeeperStorage.cpp
@ -798,6 +798,21 @@ void TestKeeperStorage::clearDeadWatches(int64_t session_id)
                if (watches_for_path.empty())
                    watches.erase(watch);
            }
+
+            auto list_watch = list_watches.find(watch_path);
+            if (list_watch != list_watches.end())
+            {
+                auto & list_watches_for_path = list_watch->second;
+                for (auto w_it = list_watches_for_path.begin(); w_it != list_watches_for_path.end();)
+                {
+                    if (w_it->session_id == session_id)
+                        w_it = list_watches_for_path.erase(w_it);
+                    else
+                        ++w_it;
+                }
+                if (list_watches_for_path.empty())
+                    list_watches.erase(list_watch);
+            }
        }
        sessions_and_watchers.erase(watches_it);
    }
--- a/src/Common/config.h.in
+++ b/src/Common/config.h.in
@ -5,6 +5,7 @@
 #cmakedefine01 USE_RE2_ST
 #cmakedefine01 USE_SSL
 #cmakedefine01 USE_HDFS
+#cmakedefine01 USE_INTERNAL_HDFS3_LIBRARY
 #cmakedefine01 USE_AWS_S3
 #cmakedefine01 USE_BROTLI
 #cmakedefine01 USE_UNWIND
--- a/src/Common/ya.make
+++ b/src/Common/ya.make
@ -7,7 +7,7 @@ ADDINCL (
    GLOBAL clickhouse/src
    contrib/libs/libcpuid
    contrib/libs/libunwind/include
-    GLOBAL contrib/restricted/ryu
+    GLOBAL contrib/restricted/dragonbox
 )

 PEERDIR(
@ -18,7 +18,7 @@ PEERDIR(
    contrib/libs/openssl
    contrib/libs/poco/NetSSL_OpenSSL
    contrib/libs/re2
-    contrib/restricted/ryu
+    contrib/restricted/dragonbox
 )

 INCLUDE(${ARCADIA_ROOT}/clickhouse/cmake/yandex/ya.make.versions.inc)
--- a/src/Common/ya.make.in
+++ b/src/Common/ya.make.in
@ -6,7 +6,7 @@ ADDINCL (
    GLOBAL clickhouse/src
    contrib/libs/libcpuid
    contrib/libs/libunwind/include
-    GLOBAL contrib/restricted/ryu
+    GLOBAL contrib/restricted/dragonbox
 )

 PEERDIR(
@ -17,7 +17,7 @@ PEERDIR(
    contrib/libs/openssl
    contrib/libs/poco/NetSSL_OpenSSL
    contrib/libs/re2
-    contrib/restricted/ryu
+    contrib/restricted/dragonbox
 )

 INCLUDE(${ARCADIA_ROOT}/clickhouse/cmake/yandex/ya.make.versions.inc)
--- a/src/Core/AccurateComparison.h
+++ b/src/Core/AccurateComparison.h
@ -515,11 +515,32 @@ inline bool NO_SANITIZE_UNDEFINED convertNumeric(From value, To & result)
        return true;
    }

-    /// Note that NaNs doesn't compare equal to anything, but they are still in range of any Float type.
-    if (isNaN(value) && std::is_floating_point_v<To>)
+    if constexpr (std::is_floating_point_v<From> && std::is_floating_point_v<To>)
    {
-        result = value;
-        return true;
+        /// Note that NaNs doesn't compare equal to anything, but they are still in range of any Float type.
+        if (isNaN(value))
+        {
+            result = value;
+            return true;
+        }
+
+        if (value == std::numeric_limits<From>::infinity())
+        {
+            result = std::numeric_limits<To>::infinity();
+            return true;
+        }
+
+        if (value == -std::numeric_limits<From>::infinity())
+        {
+            result = -std::numeric_limits<To>::infinity();
+            return true;
+        }
+    }
+
+    if (accurate::greaterOp(value, std::numeric_limits<To>::max())
+        || accurate::greaterOp(std::numeric_limits<To>::lowest(), value))
+    {
+        return false;
    }

    result = static_cast<To>(value);
--- a/src/Core/DecimalFunctions.h
+++ b/src/Core/DecimalFunctions.h
@ -6,6 +6,7 @@
 #include <common/arithmeticOverflow.h>

 #include <limits>
+#include <type_traits>


 namespace DB
@ -206,23 +207,32 @@ inline typename DecimalType::NativeType getFractionalPart(const DecimalType & de
 }

 /// Decimal to integer/float conversion
-template <typename To, typename DecimalType>
-To convertTo(const DecimalType & decimal, size_t scale)
+template <typename To, typename DecimalType, typename ReturnType>
+ReturnType convertToImpl(const DecimalType & decimal, size_t scale, To & result)
 {
    using NativeT = typename DecimalType::NativeType;
+    static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;

    if constexpr (std::is_floating_point_v<To>)
    {
-        return static_cast<To>(decimal.value) / static_cast<To>(scaleMultiplier<NativeT>(scale));
+        result = static_cast<To>(decimal.value) / static_cast<To>(scaleMultiplier<NativeT>(scale));
    }
    else if constexpr (is_integer_v<To> && (sizeof(To) >= sizeof(NativeT)))
    {
        NativeT whole = getWholePart(decimal, scale);

        if constexpr (is_unsigned_v<To>)
+        {
            if (whole < 0)
-                throw Exception("Convert overflow", ErrorCodes::DECIMAL_OVERFLOW);
-        return static_cast<To>(whole);
+            {
+                if constexpr (throw_exception)
+                    throw Exception("Convert overflow", ErrorCodes::DECIMAL_OVERFLOW);
+                else
+                    return ReturnType(true);
+            }
+        }
+
+        result = static_cast<To>(whole);
    }
    else if constexpr (is_integer_v<To>)
    {
@ -235,9 +245,34 @@ To convertTo(const DecimalType & decimal, size_t scale)
        static const constexpr CastTo max_to = std::numeric_limits<ToNativeT>::max();

        if (whole < min_to || whole > max_to)
-            throw Exception("Convert overflow", ErrorCodes::DECIMAL_OVERFLOW);
-        return static_cast<CastTo>(whole);
+        {
+            if constexpr (throw_exception)
+                throw Exception("Convert overflow", ErrorCodes::DECIMAL_OVERFLOW);
+            else
+                return ReturnType(true);
+        }
+
+        result = static_cast<CastTo>(whole);
    }
+
+    return ReturnType(true);
+}
+
+
+template <typename To, typename DecimalType>
+To convertTo(const DecimalType & decimal, size_t scale)
+{
+    To result;
+
+    convertToImpl<To, DecimalType, void>(decimal, scale, result);
+
+    return result;
+}
+
+template <typename To, typename DecimalType>
+bool tryConvertTo(const DecimalType & decimal, size_t scale, To & result)
+{
+    return convertToImpl<To, DecimalType, bool>(decimal, scale, result);
 }

 template <bool is_multiply, bool is_division, typename T, typename U, template <typename> typename DecimalType>
--- a/src/Core/Field.cpp
+++ b/src/Core/Field.cpp
@ -17,6 +17,63 @@ namespace ErrorCodes
    extern const int DECIMAL_OVERFLOW;
 }

+inline Field getBinaryValue(UInt8 type, ReadBuffer & buf)
+{
+    switch (type)
+    {
+        case Field::Types::Null: {
+            return DB::Field();
+        }
+        case Field::Types::UInt64: {
+            UInt64 value;
+            DB::readVarUInt(value, buf);
+            return value;
+        }
+        case Field::Types::UInt128: {
+            UInt128 value;
+            DB::readBinary(value, buf);
+            return value;
+        }
+        case Field::Types::Int64: {
+            Int64 value;
+            DB::readVarInt(value, buf);
+            return value;
+        }
+        case Field::Types::Float64: {
+            Float64 value;
+            DB::readFloatBinary(value, buf);
+            return value;
+        }
+        case Field::Types::String: {
+            std::string value;
+            DB::readStringBinary(value, buf);
+            return value;
+        }
+        case Field::Types::Array: {
+            Array value;
+            DB::readBinary(value, buf);
+            return value;
+        }
+        case Field::Types::Tuple: {
+            Tuple value;
+            DB::readBinary(value, buf);
+            return value;
+        }
+        case Field::Types::Map: {
+            Map value;
+            DB::readBinary(value, buf);
+            return value;
+        }
+        case Field::Types::AggregateFunctionState: {
+            AggregateFunctionStateData value;
+            DB::readStringBinary(value.name, buf);
+            DB::readStringBinary(value.data, buf);
+            return value;
+        }
+    }
+    return DB::Field();
+}
+
 void readBinary(Array & x, ReadBuffer & buf)
 {
    size_t size;
@ -25,73 +82,7 @@ void readBinary(Array & x, ReadBuffer & buf)
    DB::readBinary(size, buf);

    for (size_t index = 0; index < size; ++index)
-    {
-        switch (type)
-        {
-            case Field::Types::Null:
-            {
-                x.push_back(DB::Field());
-                break;
-            }
-            case Field::Types::UInt64:
-            {
-                UInt64 value;
-                DB::readVarUInt(value, buf);
-                x.push_back(value);
-                break;
-            }
-            case Field::Types::UInt128:
-            {
-                UInt128 value;
-                DB::readBinary(value, buf);
-                x.push_back(value);
-                break;
-            }
-            case Field::Types::Int64:
-            {
-                Int64 value;
-                DB::readVarInt(value, buf);
-                x.push_back(value);
-                break;
-            }
-            case Field::Types::Float64:
-            {
-                Float64 value;
-                DB::readFloatBinary(value, buf);
-                x.push_back(value);
-                break;
-            }
-            case Field::Types::String:
-            {
-                std::string value;
-                DB::readStringBinary(value, buf);
-                x.push_back(value);
-                break;
-            }
-            case Field::Types::Array:
-            {
-                Array value;
-                DB::readBinary(value, buf);
-                x.push_back(value);
-                break;
-            }
-            case Field::Types::Tuple:
-            {
-                Tuple value;
-                DB::readBinary(value, buf);
-                x.push_back(value);
-                break;
-            }
-            case Field::Types::AggregateFunctionState:
-            {
-                AggregateFunctionStateData value;
-                DB::readStringBinary(value.name, buf);
-                DB::readStringBinary(value.data, buf);
-                x.push_back(value);
-                break;
-            }
-        }
-    }
+        x.push_back(getBinaryValue(type, buf));
 }

 void writeBinary(const Array & x, WriteBuffer & buf)
@ -104,53 +95,7 @@ void writeBinary(const Array & x, WriteBuffer & buf)
    DB::writeBinary(size, buf);

    for (const auto & elem : x)
-    {
-        switch (type)
-        {
-            case Field::Types::Null: break;
-            case Field::Types::UInt64:
-            {
-                DB::writeVarUInt(get<UInt64>(elem), buf);
-                break;
-            }
-            case Field::Types::UInt128:
-            {
-                DB::writeBinary(get<UInt128>(elem), buf);
-                break;
-            }
-            case Field::Types::Int64:
-            {
-                DB::writeVarInt(get<Int64>(elem), buf);
-                break;
-            }
-            case Field::Types::Float64:
-            {
-                DB::writeFloatBinary(get<Float64>(elem), buf);
-                break;
-            }
-            case Field::Types::String:
-            {
-                DB::writeStringBinary(get<std::string>(elem), buf);
-                break;
-            }
-            case Field::Types::Array:
-            {
-                DB::writeBinary(get<Array>(elem), buf);
-                break;
-            }
-            case Field::Types::Tuple:
-            {
-                DB::writeBinary(get<Tuple>(elem), buf);
-                break;
-            }
-            case Field::Types::AggregateFunctionState:
-            {
-                DB::writeStringBinary(elem.get<AggregateFunctionStateData>().name, buf);
-                DB::writeStringBinary(elem.get<AggregateFunctionStateData>().data, buf);
-                break;
-            }
-        }
-    }
+        Field::dispatch([&buf] (const auto & value) { DB::FieldVisitorWriteBinary()(value, buf); }, elem);
 }

 void writeText(const Array & x, WriteBuffer & buf)
@ -168,93 +113,7 @@ void readBinary(Tuple & x, ReadBuffer & buf)
    {
        UInt8 type;
        DB::readBinary(type, buf);
-
-        switch (type)
-        {
-            case Field::Types::Null:
-            {
-                x.push_back(DB::Field());
-                break;
-            }
-            case Field::Types::UInt64:
-            {
-                UInt64 value;
-                DB::readVarUInt(value, buf);
-                x.push_back(value);
-                break;
-            }
-            case Field::Types::UInt128:
-            {
-                UInt128 value;
-                DB::readBinary(value, buf);
-                x.push_back(value);
-                break;
-            }
-            case Field::Types::Int64:
-            {
-                Int64 value;
-                DB::readVarInt(value, buf);
-                x.push_back(value);
-                break;
-            }
-            case Field::Types::Int128:
-            {
-                Int64 value;
-                DB::readVarInt(value, buf);
-                x.push_back(value);
-                break;
-            }
-            case Field::Types::Float64:
-            {
-                Float64 value;
-                DB::readFloatBinary(value, buf);
-                x.push_back(value);
-                break;
-            }
-            case Field::Types::String:
-            {
-                std::string value;
-                DB::readStringBinary(value, buf);
-                x.push_back(value);
-                break;
-            }
-            case Field::Types::UInt256:
-            {
-                UInt256 value;
-                DB::readBinary(value, buf);
-                x.push_back(value);
-                break;
-            }
-            case Field::Types::Int256:
-            {
-                Int256 value;
-                DB::readBinary(value, buf);
-                x.push_back(value);
-                break;
-            }
-            case Field::Types::Array:
-            {
-                Array value;
-                DB::readBinary(value, buf);
-                x.push_back(value);
-                break;
-            }
-            case Field::Types::Tuple:
-            {
-                Tuple value;
-                DB::readBinary(value, buf);
-                x.push_back(value);
-                break;
-            }
-            case Field::Types::AggregateFunctionState:
-            {
-                AggregateFunctionStateData value;
-                DB::readStringBinary(value.name, buf);
-                DB::readStringBinary(value.data, buf);
-                x.push_back(value);
-                break;
-            }
-        }
+        x.push_back(getBinaryValue(type, buf));
    }
 }

@ -267,67 +126,7 @@ void writeBinary(const Tuple & x, WriteBuffer & buf)
    {
        const UInt8 type = elem.getType();
        DB::writeBinary(type, buf);
-
-        switch (type)
-        {
-            case Field::Types::Null: break;
-            case Field::Types::UInt64:
-            {
-                DB::writeVarUInt(get<UInt64>(elem), buf);
-                break;
-            }
-            case Field::Types::UInt128:
-            {
-                DB::writeBinary(get<UInt128>(elem), buf);
-                break;
-            }
-            case Field::Types::Int64:
-            {
-                DB::writeVarInt(get<Int64>(elem), buf);
-                break;
-            }
-            case Field::Types::Int128:
-            {
-                DB::writeVarInt(get<Int64>(elem), buf);
-                break;
-            }
-            case Field::Types::Float64:
-            {
-                DB::writeFloatBinary(get<Float64>(elem), buf);
-                break;
-            }
-            case Field::Types::String:
-            {
-                DB::writeStringBinary(get<std::string>(elem), buf);
-                break;
-            }
-            case Field::Types::UInt256:
-            {
-                DB::writeBinary(get<UInt256>(elem), buf);
-                break;
-            }
-            case Field::Types::Int256:
-            {
-                DB::writeBinary(get<Int256>(elem), buf);
-                break;
-            }
-            case Field::Types::Array:
-            {
-                DB::writeBinary(get<Array>(elem), buf);
-                break;
-            }
-            case Field::Types::Tuple:
-            {
-                DB::writeBinary(get<Tuple>(elem), buf);
-                break;
-            }
-            case Field::Types::AggregateFunctionState:
-            {
-                DB::writeStringBinary(elem.get<AggregateFunctionStateData>().name, buf);
-                DB::writeStringBinary(elem.get<AggregateFunctionStateData>().data, buf);
-                break;
-            }
-        }
+        Field::dispatch([&buf] (const auto & value) { DB::FieldVisitorWriteBinary()(value, buf); }, elem);
    }
 }

@ -336,6 +135,37 @@ void writeText(const Tuple & x, WriteBuffer & buf)
    writeFieldText(DB::Field(x), buf);
 }

+void readBinary(Map & x, ReadBuffer & buf)
+{
+    size_t size;
+    DB::readBinary(size, buf);
+
+    for (size_t index = 0; index < size; ++index)
+    {
+        UInt8 type;
+        DB::readBinary(type, buf);
+        x.push_back(getBinaryValue(type, buf));
+    }
+}
+
+void writeBinary(const Map & x, WriteBuffer & buf)
+{
+    const size_t size = x.size();
+    DB::writeBinary(size, buf);
+
+    for (const auto & elem : x)
+    {
+        const UInt8 type = elem.getType();
+        DB::writeBinary(type, buf);
+        Field::dispatch([&buf] (const auto & value) { DB::FieldVisitorWriteBinary()(value, buf); }, elem);
+    }
+}
+
+void writeText(const Map & x, WriteBuffer & buf)
+{
+    writeFieldText(DB::Field(x), buf);
+}
+
 template <typename T>
 void readQuoted(DecimalField<T> & x, ReadBuffer & buf)
 {
@ -530,6 +360,30 @@ Field Field::restoreFromDump(const std::string_view & dump_)
        return tuple;
    }

+    prefix = std::string_view{"Map_("};
+    if (dump.starts_with(prefix))
+    {
+        std::string_view tail = dump.substr(prefix.length());
+        trimLeft(tail);
+        Map map;
+        while (tail != ")")
+        {
+            size_t separator = tail.find_first_of(",)");
+            if (separator == std::string_view::npos)
+                show_error();
+            bool comma = (tail[separator] == ',');
+            std::string_view element = tail.substr(0, separator);
+            tail.remove_prefix(separator);
+            if (comma)
+                tail.remove_prefix(1);
+            trimLeft(tail);
+            if (!comma && tail != ")")
+                show_error();
+            map.push_back(Field::restoreFromDump(element));
+        }
+        return map;
+    }
+
    prefix = std::string_view{"AggregateFunctionState_("};
    if (dump.starts_with(prefix))
    {
--- a/src/Core/Field.h
+++ b/src/Core/Field.h
@ -51,6 +51,9 @@ struct X : public FieldVector \
 DEFINE_FIELD_VECTOR(Array);
 DEFINE_FIELD_VECTOR(Tuple);

+/// An array with the following structure: [(key1, value1), (key2, value2), ...]
+DEFINE_FIELD_VECTOR(Map);
+
 #undef DEFINE_FIELD_VECTOR

 struct AggregateFunctionStateData
@ -206,6 +209,7 @@ template <> struct NearestFieldTypeImpl<std::string_view> { using Type = String;
 template <> struct NearestFieldTypeImpl<String> { using Type = String; };
 template <> struct NearestFieldTypeImpl<Array> { using Type = Array; };
 template <> struct NearestFieldTypeImpl<Tuple> { using Type = Tuple; };
+template <> struct NearestFieldTypeImpl<Map> { using Type = Map; };
 template <> struct NearestFieldTypeImpl<bool> { using Type = UInt64; };
 template <> struct NearestFieldTypeImpl<Null> { using Type = Null; };

@ -259,6 +263,7 @@ public:
            Decimal256 = 23,
            UInt256 = 24,
            Int256  = 25,
+            Map = 26,
        };

        static const int MIN_NON_POD = 16;
@ -276,6 +281,7 @@ public:
                case String:  return "String";
                case Array:   return "Array";
                case Tuple:   return "Tuple";
+                case Map:     return "Map";
                case Decimal32:  return "Decimal32";
                case Decimal64:  return "Decimal64";
                case Decimal128: return "Decimal128";
@ -464,6 +470,7 @@ public:
            case Types::String:  return get<String>()  < rhs.get<String>();
            case Types::Array:   return get<Array>()   < rhs.get<Array>();
            case Types::Tuple:   return get<Tuple>()   < rhs.get<Tuple>();
+            case Types::Map:     return get<Map>()     < rhs.get<Map>();
            case Types::Decimal32:  return get<DecimalField<Decimal32>>()  < rhs.get<DecimalField<Decimal32>>();
            case Types::Decimal64:  return get<DecimalField<Decimal64>>()  < rhs.get<DecimalField<Decimal64>>();
            case Types::Decimal128: return get<DecimalField<Decimal128>>() < rhs.get<DecimalField<Decimal128>>();
@ -499,6 +506,7 @@ public:
            case Types::String:  return get<String>()  <= rhs.get<String>();
            case Types::Array:   return get<Array>()   <= rhs.get<Array>();
            case Types::Tuple:   return get<Tuple>()   <= rhs.get<Tuple>();
+            case Types::Map:     return get<Map>()     <= rhs.get<Map>();
            case Types::Decimal32:  return get<DecimalField<Decimal32>>()  <= rhs.get<DecimalField<Decimal32>>();
            case Types::Decimal64:  return get<DecimalField<Decimal64>>()  <= rhs.get<DecimalField<Decimal64>>();
            case Types::Decimal128: return get<DecimalField<Decimal128>>() <= rhs.get<DecimalField<Decimal128>>();
@ -536,6 +544,7 @@ public:
            case Types::String:  return get<String>()  == rhs.get<String>();
            case Types::Array:   return get<Array>()   == rhs.get<Array>();
            case Types::Tuple:   return get<Tuple>()   == rhs.get<Tuple>();
+            case Types::Map:     return get<Map>()     == rhs.get<Map>();
            case Types::UInt128: return get<UInt128>() == rhs.get<UInt128>();
            case Types::Int128:  return get<Int128>()  == rhs.get<Int128>();
            case Types::Decimal32:  return get<DecimalField<Decimal32>>()  == rhs.get<DecimalField<Decimal32>>();
@ -575,6 +584,7 @@ public:
            case Types::String:  return f(field.template get<String>());
            case Types::Array:   return f(field.template get<Array>());
            case Types::Tuple:   return f(field.template get<Tuple>());
+            case Types::Map:     return f(field.template get<Map>());
            case Types::Decimal32:  return f(field.template get<DecimalField<Decimal32>>());
            case Types::Decimal64:  return f(field.template get<DecimalField<Decimal64>>());
            case Types::Decimal128: return f(field.template get<DecimalField<Decimal128>>());
@ -600,7 +610,7 @@ public:

 private:
    std::aligned_union_t<DBMS_MIN_FIELD_SIZE - sizeof(Types::Which),
-        Null, UInt64, UInt128, Int64, Int128, Float64, String, Array, Tuple,
+        Null, UInt64, UInt128, Int64, Int128, Float64, String, Array, Tuple, Map,
        DecimalField<Decimal32>, DecimalField<Decimal64>, DecimalField<Decimal128>, DecimalField<Decimal256>,
        AggregateFunctionStateData,
        UInt256, Int256
@ -699,6 +709,9 @@ private:
            case Types::Tuple:
                destroy<Tuple>();
                break;
+            case Types::Map:
+                destroy<Map>();
+                break;
            case Types::AggregateFunctionState:
                destroy<AggregateFunctionStateData>();
                break;
@ -729,6 +742,7 @@ template <> struct Field::TypeToEnum<Float64> { static const Types::Which value
 template <> struct Field::TypeToEnum<String>  { static const Types::Which value = Types::String; };
 template <> struct Field::TypeToEnum<Array>   { static const Types::Which value = Types::Array; };
 template <> struct Field::TypeToEnum<Tuple>   { static const Types::Which value = Types::Tuple; };
+template <> struct Field::TypeToEnum<Map>     { static const Types::Which value = Types::Map; };
 template <> struct Field::TypeToEnum<DecimalField<Decimal32>>{ static const Types::Which value = Types::Decimal32; };
 template <> struct Field::TypeToEnum<DecimalField<Decimal64>>{ static const Types::Which value = Types::Decimal64; };
 template <> struct Field::TypeToEnum<DecimalField<Decimal128>>{ static const Types::Which value = Types::Decimal128; };
@ -747,6 +761,7 @@ template <> struct Field::EnumToType<Field::Types::Float64> { using Type = Float
 template <> struct Field::EnumToType<Field::Types::String>  { using Type = String; };
 template <> struct Field::EnumToType<Field::Types::Array>   { using Type = Array; };
 template <> struct Field::EnumToType<Field::Types::Tuple>   { using Type = Tuple; };
+template <> struct Field::EnumToType<Field::Types::Map>     { using Type = Map; };
 template <> struct Field::EnumToType<Field::Types::Decimal32> { using Type = DecimalField<Decimal32>; };
 template <> struct Field::EnumToType<Field::Types::Decimal64> { using Type = DecimalField<Decimal64>; };
 template <> struct Field::EnumToType<Field::Types::Decimal128> { using Type = DecimalField<Decimal128>; };
@ -814,6 +829,7 @@ T safeGet(Field & field)

 template <> struct TypeName<Array> { static std::string get() { return "Array"; } };
 template <> struct TypeName<Tuple> { static std::string get() { return "Tuple"; } };
+template <> struct TypeName<Map>   { static std::string get() { return "Map"; } };
 template <> struct TypeName<AggregateFunctionStateData> { static std::string get() { return "AggregateFunctionState"; } };

 template <typename T>
@ -900,6 +916,12 @@ void writeBinary(const Tuple & x, WriteBuffer & buf);

 void writeText(const Tuple & x, WriteBuffer & buf);

+void readBinary(Map & x, ReadBuffer & buf);
+[[noreturn]] inline void readText(Map &, ReadBuffer &) { throw Exception("Cannot read Map.", ErrorCodes::NOT_IMPLEMENTED); }
+[[noreturn]] inline void readQuoted(Map &, ReadBuffer &) { throw Exception("Cannot read Map.", ErrorCodes::NOT_IMPLEMENTED); }
+void writeBinary(const Map & x, WriteBuffer & buf);
+void writeText(const Map & x, WriteBuffer & buf);
+[[noreturn]] inline void writeQuoted(const Map &, WriteBuffer &) { throw Exception("Cannot write Map quoted.", ErrorCodes::NOT_IMPLEMENTED); }

 __attribute__ ((noreturn)) inline void writeText(const AggregateFunctionStateData &, WriteBuffer &)
 {
--- a/src/Core/Settings.cpp
+++ b/src/Core/Settings.cpp
@ -16,10 +16,8 @@ namespace ErrorCodes
    extern const int UNKNOWN_ELEMENT_IN_CONFIG;
 }

-
 IMPLEMENT_SETTINGS_TRAITS(SettingsTraits, LIST_OF_SETTINGS)

-
 /** Set the settings from the profile (in the server configuration, many settings can be listed in one profile).
    * The profile can also be set using the `set` functions, like the `profile` setting.
    */
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -65,6 +65,7 @@ class IColumn;
    M(UInt64, distributed_connections_pool_size, DBMS_DEFAULT_DISTRIBUTED_CONNECTIONS_POOL_SIZE, "Maximum number of connections with one remote server in the pool.", 0) \
    M(UInt64, connections_with_failover_max_tries, DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES, "The maximum number of attempts to connect to replicas.", 0) \
    M(UInt64, s3_min_upload_part_size, 512*1024*1024, "The minimum size of part to upload during multipart upload to S3.", 0) \
+    M(UInt64, s3_max_single_part_upload_size, 64*1024*1024, "The maximum size of object to upload using singlepart upload to S3.", 0) \
    M(UInt64, s3_max_redirects, 10, "Max number of S3 redirects hops allowed.", 0) \
    M(Bool, extremes, false, "Calculate minimums and maximums of the result columns. They can be output in JSON-formats.", IMPORTANT) \
    M(Bool, use_uncompressed_cache, true, "Whether to use the cache of uncompressed blocks.", 0) \
@ -165,7 +166,7 @@ class IColumn;
    \
    M(UInt64, insert_quorum, 0, "For INSERT queries in the replicated table, wait writing for the specified number of replicas and linearize the addition of the data. 0 - disabled.", 0) \
    M(Milliseconds, insert_quorum_timeout, 600000, "", 0) \
-    M(Bool, insert_quorum_parallel, false, "For quorum INSERT queries - enable to make parallel inserts without linearizability", 0) \
+    M(Bool, insert_quorum_parallel, true, "For quorum INSERT queries - enable to make parallel inserts without linearizability", 0) \
    M(UInt64, select_sequential_consistency, 0, "For SELECT queries from the replicated table, throw an exception if the replica does not have a chunk written with the quorum; do not read the parts that have not yet been written with the quorum.", 0) \
    M(UInt64, table_function_remote_max_addresses, 1000, "The maximum number of different shards and the maximum number of replicas of one shard in the `remote` function.", 0) \
    M(Milliseconds, read_backoff_min_latency_ms, 1000, "Setting to reduce the number of threads in case of slow reads. Pay attention only to reads that took at least that much time.", 0) \
@ -238,6 +239,8 @@ class IColumn;
      * Almost all limits apply to each stream individually. \
      */ \
    \
+    M(UInt64, limit, 0, "Limit on read rows from the most 'end' result for select query, default 0 means no limit length", 0) \
+    M(UInt64, offset, 0, "Offset on read rows from the most 'end' result for select query", 0) \
    M(UInt64, max_rows_to_read, 0, "Limit on read rows from the most 'deep' sources. That is, only in the deepest subquery. When reading from a remote server, it is only checked on a remote server.", 0) \
    M(UInt64, max_bytes_to_read, 0, "Limit on read bytes (after decompression) from the most 'deep' sources. That is, only in the deepest subquery. When reading from a remote server, it is only checked on a remote server.", 0) \
    M(OverflowMode, read_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.", 0) \
@ -400,6 +403,8 @@ class IColumn;
    M(Bool, enable_global_with_statement, false, "Propagate WITH statements to UNION queries and all subqueries", 0) \
    M(Bool, aggregate_functions_null_for_empty, false, "Rewrite all aggregate functions in a query, adding -OrNull suffix to them", 0) \
    M(Bool, optimize_skip_merged_partitions, false, "Skip partitions with one part with level > 0 in optimize final", 0) \
+    M(Bool, optimize_on_insert, true, "Do the same transformation for inserted block of data as if merge was done on this block.", 0) \
+    M(Bool, allow_experimental_map_type, false, "Allow data type Map", 0) \
    \
    M(Bool, use_antlr_parser, false, "Parse incoming queries using ANTLR-generated parser", 0) \
    \
--- a/src/Core/SortCursor.h
+++ b/src/Core/SortCursor.h
@ -30,7 +30,6 @@ struct SortCursorImpl
    ColumnRawPtrs all_columns;
    SortDescription desc;
    size_t sort_columns_size = 0;
-    size_t pos = 0;
    size_t rows = 0;

    /** Determines order if comparing columns are equal.
@ -49,15 +48,20 @@ struct SortCursorImpl
    /** Is there at least one column with Collator. */
    bool has_collation = false;

+    /** We could use SortCursorImpl in case when columns aren't sorted
+      *  but we have their sorted permutation
+      */
+    IColumn::Permutation * permutation = nullptr;
+
    SortCursorImpl() {}

-    SortCursorImpl(const Block & block, const SortDescription & desc_, size_t order_ = 0)
+    SortCursorImpl(const Block & block, const SortDescription & desc_, size_t order_ = 0, IColumn::Permutation * perm = nullptr)
        : desc(desc_), sort_columns_size(desc.size()), order(order_), need_collation(desc.size())
    {
-        reset(block);
+        reset(block, perm);
    }

-    SortCursorImpl(const Columns & columns, const SortDescription & desc_, size_t order_ = 0)
+    SortCursorImpl(const Columns & columns, const SortDescription & desc_, size_t order_ = 0, IColumn::Permutation * perm = nullptr)
        : desc(desc_), sort_columns_size(desc.size()), order(order_), need_collation(desc.size())
    {
        for (auto & column_desc : desc)
@ -66,19 +70,19 @@ struct SortCursorImpl
                throw Exception("SortDescription should contain column position if SortCursor was used without header.",
                        ErrorCodes::LOGICAL_ERROR);
        }
-        reset(columns, {});
+        reset(columns, {}, perm);
    }

    bool empty() const { return rows == 0; }

    /// Set the cursor to the beginning of the new block.
-    void reset(const Block & block)
+    void reset(const Block & block, IColumn::Permutation * perm = nullptr)
    {
-        reset(block.getColumns(), block);
+        reset(block.getColumns(), block, perm);
    }

    /// Set the cursor to the beginning of the new block.
-    void reset(const Columns & columns, const Block & block)
+    void reset(const Columns & columns, const Block & block, IColumn::Permutation * perm = nullptr)
    {
        all_columns.clear();
        sort_columns.clear();
@ -96,18 +100,33 @@ struct SortCursorImpl
                                   : column_desc.column_number;
            sort_columns.push_back(columns[column_number].get());

-            need_collation[j] = desc[j].collator != nullptr && sort_columns.back()->isCollationSupported();    /// TODO Nullable(String)
+            need_collation[j] = desc[j].collator != nullptr && sort_columns.back()->isCollationSupported();
            has_collation |= need_collation[j];
        }

        pos = 0;
        rows = all_columns[0]->size();
+        permutation = perm;
    }

+    size_t getRow() const
+    {
+        if (permutation)
+            return (*permutation)[pos];
+        return pos;
+    }
+
+    /// We need a possibility to change pos (see MergeJoin).
+    size_t & getPosRef() { return pos; }
+
    bool isFirst() const { return pos == 0; }
    bool isLast() const { return pos + 1 >= rows; }
    bool isValid() const { return pos < rows; }
    void next() { ++pos; }
+
+/// Prevent using pos instead of getRow()
+private:
+    size_t pos;
 };

 using SortCursorImpls = std::vector<SortCursorImpl>;
@ -127,7 +146,7 @@ struct SortCursorHelper

    bool ALWAYS_INLINE greater(const SortCursorHelper & rhs) const
    {
-        return derived().greaterAt(rhs.derived(), impl->pos, rhs.impl->pos);
+        return derived().greaterAt(rhs.derived(), impl->getRow(), rhs.impl->getRow());
    }

    /// Inverted so that the priority queue elements are removed in ascending order.
--- a/src/Core/Types.h
+++ b/src/Core/Types.h
@ -56,6 +56,7 @@ enum class TypeIndex
    Function,
    AggregateFunction,
    LowCardinality,
+    Map,
 };
 #if !__clang__
 #pragma GCC diagnostic pop
@ -267,6 +268,7 @@ inline constexpr const char * getTypeName(TypeIndex idx)
        case TypeIndex::Function:   return "Function";
        case TypeIndex::AggregateFunction: return "AggregateFunction";
        case TypeIndex::LowCardinality: return "LowCardinality";
+        case TypeIndex::Map:        return "Map";
    }

    __builtin_unreachable();
--- a/Show More
+++ b/Show More