Merge branch 'master' into bobrik-parallel-randes

2024-10-05 08:00:51 +00:00 · 2020-07-20 19:05:28 +03:00 · 2020-07-20 19:05:28 +03:00 · d6583698a9
commit d6583698a9
parent 0cc55781d8 586872674b
39 changed files with 1057 additions and 848 deletions
--- a/base/common/StringRef.h
+++ b/base/common/StringRef.h
@ -30,7 +30,7 @@ struct StringRef
    constexpr StringRef(const CharT * data_, size_t size_) : data(reinterpret_cast<const char *>(data_)), size(size_) {}

    StringRef(const std::string & s) : data(s.data()), size(s.size()) {}
-    constexpr StringRef(const std::string_view & s) : data(s.data()), size(s.size()) {}
+    constexpr explicit StringRef(const std::string_view & s) : data(s.data()), size(s.size()) {}
    constexpr StringRef(const char * data_) : StringRef(std::string_view{data_}) {}
    constexpr StringRef() = default;

--- a/cmake/find/simdjson.cmake
+++ b/cmake/find/simdjson.cmake
@ -1,17 +1,8 @@
-if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/simdjson/include/simdjson/jsonparser.h")
+if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/simdjson/include/simdjson.h")
    message (WARNING "submodule contrib/simdjson is missing. to fix try run: \n git submodule update --init --recursive")
    return()
 endif ()

-if (NOT HAVE_SSE42)
-    message (WARNING "submodule contrib/simdjson requires support of SSE4.2 instructions")
-    return()
-elseif (NOT HAVE_PCLMULQDQ)
-    message (WARNING "submodule contrib/simdjson requires support of PCLMULQDQ instructions")
-    return()
-endif ()
-
 option (USE_SIMDJSON "Use simdjson" ON)
-set (SIMDJSON_LIBRARY "simdjson")

-message(STATUS "Using simdjson=${USE_SIMDJSON}: ${SIMDJSON_LIBRARY}")
+message(STATUS "Using simdjson=${USE_SIMDJSON}")
--- a/contrib/simdjson
+++ b/contrib/simdjson
@ -1 +1 @@
-Subproject commit 560f0742cc0895d00d78359dbdeb82064a24adb8
+Subproject commit 1e4aa116e5a39e4ba23b9a93e6c7f048c5105b20
--- a/contrib/simdjson-cmake/CMakeLists.txt
+++ b/contrib/simdjson-cmake/CMakeLists.txt
@ -1,14 +1,6 @@
 set(SIMDJSON_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/simdjson/include")
-set(SIMDJSON_SRC_DIR "${SIMDJSON_INCLUDE_DIR}/../src")
-set(SIMDJSON_SRC
-    ${SIMDJSON_SRC_DIR}/document.cpp
-    ${SIMDJSON_SRC_DIR}/error.cpp
-    ${SIMDJSON_SRC_DIR}/implementation.cpp
-    ${SIMDJSON_SRC_DIR}/jsonioutil.cpp
-    ${SIMDJSON_SRC_DIR}/jsonminifier.cpp
-    ${SIMDJSON_SRC_DIR}/stage1_find_marks.cpp
-    ${SIMDJSON_SRC_DIR}/stage2_build_tape.cpp
-)
+set(SIMDJSON_SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/simdjson/src")
+set(SIMDJSON_SRC ${SIMDJSON_SRC_DIR}/simdjson.cpp)

-add_library(${SIMDJSON_LIBRARY} ${SIMDJSON_SRC})
-target_include_directories(${SIMDJSON_LIBRARY} SYSTEM PUBLIC "${SIMDJSON_INCLUDE_DIR}" PRIVATE "${SIMDJSON_SRC_DIR}")
+add_library(simdjson ${SIMDJSON_SRC})
+target_include_directories(simdjson SYSTEM PUBLIC "${SIMDJSON_INCLUDE_DIR}" PRIVATE "${SIMDJSON_SRC_DIR}")
--- a/docker/test/fuzzer/run-fuzzer.sh
+++ b/docker/test/fuzzer/run-fuzzer.sh
@ -67,19 +67,7 @@ function watchdog
        sleep 1
    done

-    ./clickhouse-client --query "select elapsed, query from system.processes" ||:
-
-    killall clickhouse-server ||:
-    for x in {1..10}
-    do
-        if ! pgrep -f clickhouse-server
-        then
-            break
-        fi
-        sleep 1
-    done
-
-    killall -9 clickhouse-server clickhouse-client ||:
+    killall -9 clickhouse-client ||:
 }

 function fuzz
@ -100,8 +88,18 @@ function fuzz
        || fuzzer_exit_code=$?
    
    echo "Fuzzer exit code is $fuzzer_exit_code"
+
    ./clickhouse-client --query "select elapsed, query from system.processes" ||:
-    kill -9 $server_pid ||:
+    killall clickhouse-server ||:
+    for x in {1..10}
+    do
+        if ! pgrep -f clickhouse-server
+        then
+            break
+        fi
+        sleep 1
+    done
+    killall -9 clickhouse-server ||:

    if [ "$fuzzer_exit_code" == "143" ]
    then
--- a/docs/.yaspellerrc
+++ b/docs/.yaspellerrc
@ -1,25 +0,0 @@
-{
-  "checkYo": false,
-  "excludeFiles": [],
-  "fileExtensions": [],
-  "format": "auto",
-  "ignoreTags": [
-    "code",
-    "kbd",
-    "object",
-    "samp",
-    "script",
-    "style",
-    "var"
-  ],
-  "maxRequests": 2,
-  "lang": "en,ru",
-  "report": ["console"],
-  "dictionary": [
-    "(C|c)lick(H|h)ouse",
-    "CatBoost",
-    "(Ш|ш)ард(ы|ов|а|у|е|ам|ирование|ированы|ах)?",
-    "логир(ование|уются|ования)?",
-    "конфиг(а|е|ом|у)"
-  ]
-}
--- a/docs/en/development/browse-code.md
+++ b/docs/en/development/browse-code.md
@ -1,6 +1,6 @@
 ---
 toc_priority: 71
-toc_title: Source Code
+toc_title: Source Code Browser
 ---

 # Browse ClickHouse Source Code {#browse-clickhouse-source-code}
--- a/docs/en/development/build-cross-arm.md
+++ b/docs/en/development/build-cross-arm.md
@ -1,6 +1,6 @@
 ---
 toc_priority: 67
-toc_title: How to Build ClickHouse on Linux for AARCH64 (ARM64)
+toc_title: Build on Linux for AARCH64 (ARM64)
 ---

 # How to Build ClickHouse on Linux for AARCH64 (ARM64) Architecture {#how-to-build-clickhouse-on-linux-for-aarch64-arm64-architecture}
@ -9,7 +9,7 @@ This is for the case when you have Linux machine and want to use it to build `cl

 The cross-build for AARCH64 is based on the [Build instructions](../development/build.md), follow them first.

-# Install Clang-8 {#install-clang-8}
+## Install Clang-8 {#install-clang-8}

 Follow the instructions from https://apt.llvm.org/ for your Ubuntu or Debian setup.
 For example, in Ubuntu Bionic you can use the following commands:
@ -20,7 +20,7 @@ sudo apt-get update
 sudo apt-get install clang-8
 ```

-# Install Cross-Compilation Toolset {#install-cross-compilation-toolset}
+## Install Cross-Compilation Toolset {#install-cross-compilation-toolset}

 ``` bash
 cd ClickHouse
@ -29,7 +29,7 @@ wget 'https://developer.arm.com/-/media/Files/downloads/gnu-a/8.3-2019.03/binrel
 tar xJf gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz -C build-aarch64/cmake/toolchain/linux-aarch64 --strip-components=1
 ```

-# Build ClickHouse {#build-clickhouse}
+## Build ClickHouse {#build-clickhouse}

 ``` bash
 cd ClickHouse
--- a/docs/en/development/build-cross-osx.md
+++ b/docs/en/development/build-cross-osx.md
@ -1,6 +1,6 @@
 ---
 toc_priority: 66
-toc_title: How to Build ClickHouse on Linux for Mac OS X
+toc_title: Build on Linux for Mac OS X
 ---

 # How to Build ClickHouse on Linux for Mac OS X {#how-to-build-clickhouse-on-linux-for-mac-os-x}
@ -9,7 +9,7 @@ This is for the case when you have Linux machine and want to use it to build `cl

 The cross-build for Mac OS X is based on the [Build instructions](../development/build.md), follow them first.

-# Install Clang-8 {#install-clang-8}
+## Install Clang-8 {#install-clang-8}

 Follow the instructions from https://apt.llvm.org/ for your Ubuntu or Debian setup.
 For example the commands for Bionic are like:
@ -19,7 +19,7 @@ sudo echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-8
 sudo apt-get install clang-8
 ```

-# Install Cross-Compilation Toolset {#install-cross-compilation-toolset}
+## Install Cross-Compilation Toolset {#install-cross-compilation-toolset}

 Let’s remember the path where we install `cctools` as ${CCTOOLS}

@ -47,7 +47,7 @@ mkdir -p build-darwin/cmake/toolchain/darwin-x86_64
 tar xJf MacOSX10.14.sdk.tar.xz -C build-darwin/cmake/toolchain/darwin-x86_64 --strip-components=1
 ```

-# Build ClickHouse {#build-clickhouse}
+## Build ClickHouse {#build-clickhouse}

 ``` bash
 cd ClickHouse
--- a/docs/en/development/build-osx.md
+++ b/docs/en/development/build-osx.md
@ -1,6 +1,6 @@
 ---
 toc_priority: 65
-toc_title: How to Build ClickHouse on Mac OS X
+toc_title: Build on Mac OS X
 ---

 # How to Build ClickHouse on Mac OS X {#how-to-build-clickhouse-on-mac-os-x}
@ -45,14 +45,12 @@ $ cd ..

 ## Caveats {#caveats}

-If you intend to run clickhouse-server, make sure to increase the system’s maxfiles variable.
+If you intend to run `clickhouse-server`, make sure to increase the system’s maxfiles variable.

 !!! info "Note"
    You’ll need to use sudo.

-To do so, create the following file:
-
-/Library/LaunchDaemons/limit.maxfiles.plist:
+To do so, create the `/Library/LaunchDaemons/limit.maxfiles.plist` file with the following content:

 ``` xml
 <?xml version="1.0" encoding="UTF-8"?>
--- a/docs/en/development/contrib.md
+++ b/docs/en/development/contrib.md
@ -35,6 +35,7 @@ toc_title: Third-Party Libraries Used
 | poco                | [Boost Software License - Version 1.0](https://github.com/ClickHouse-Extras/poco/blob/fe5505e56c27b6ecb0dcbc40c49dc2caf4e9637f/LICENSE)      |
 | protobuf            | [BSD 3-Clause License](https://github.com/ClickHouse-Extras/protobuf/blob/12735370922a35f03999afff478e1c6d7aa917a4/LICENSE)                  |
 | re2                 | [BSD 3-Clause License](https://github.com/google/re2/blob/7cf8b88e8f70f97fd4926b56aa87e7f53b2717e0/LICENSE)                                  |
+| sentry-native       | [MIT License](https://github.com/getsentry/sentry-native/blob/master/LICENSE)                                                                 |
 | UnixODBC            | [LGPL v2.1](https://github.com/ClickHouse-Extras/UnixODBC/tree/b0ad30f7f6289c12b76f04bfb9d466374bb32168)                                     |
 | zlib-ng             | [Zlib License](https://github.com/ClickHouse-Extras/zlib-ng/blob/develop/LICENSE.md)                                                         |
 | zstd                | [BSD 3-Clause License](https://github.com/facebook/zstd/blob/dev/LICENSE)                                                                    |
--- a/docs/en/development/style.md
+++ b/docs/en/development/style.md
@ -1,6 +1,6 @@
 ---
 toc_priority: 68
-toc_title: How to Write C++ Code
+toc_title: C++ Guide
 ---

 # How to Write C++ Code {#how-to-write-c-code}
--- a/docs/en/development/tests.md
+++ b/docs/en/development/tests.md
@ -1,6 +1,6 @@
 ---
 toc_priority: 69
-toc_title: How to Run ClickHouse Tests
+toc_title: Testing
 ---

 # ClickHouse Testing {#clickhouse-testing}
@ -25,12 +25,7 @@ Tests should use (create, drop, etc) only tables in `test` database that is assu

 If you want to use distributed queries in functional tests, you can leverage `remote` table function with `127.0.0.{1..2}` addresses for the server to query itself; or you can use predefined test clusters in server configuration file like `test_shard_localhost`.

-Some tests are marked with `zookeeper`, `shard` or `long` in their names.
-`zookeeper` is for tests that are using ZooKeeper. `shard` is for tests that
-requires server to listen `127.0.0.*`; `distributed` or `global` have the same
-meaning. `long` is for tests that run slightly longer that one second. You can
-disable these groups of tests using `--no-zookeeper`, `--no-shard` and
-`--no-long` options, respectively.
+Some tests are marked with `zookeeper`, `shard` or `long` in their names. `zookeeper` is for tests that are using ZooKeeper. `shard` is for tests that requires server to listen `127.0.0.*`; `distributed` or `global` have the same meaning. `long` is for tests that run slightly longer that one second. You can disable these groups of tests using `--no-zookeeper`, `--no-shard` and `--no-long` options, respectively.

 ## Known Bugs {#known-bugs}

@ -153,11 +148,11 @@ Motivation:

 Normally we release and run all tests on a single variant of ClickHouse build. But there are alternative build variants that are not thoroughly tested. Examples:

-   build on FreeBSD;
-   build on Debian with libraries from system packages;
-   build with shared linking of libraries;
-   build on AArch64 platform;
-   build on PowerPc platform.
+-   build on FreeBSD
+-   build on Debian with libraries from system packages
+-   build with shared linking of libraries
+-   build on AArch64 platform
+-   build on PowerPc platform

 For example, build with system packages is bad practice, because we cannot guarantee what exact version of packages a system will have. But this is really needed by Debian maintainers. For this reason we at least have to support this variant of build. Another example: shared linking is a common source of trouble, but it is needed for some enthusiasts.

@ -177,22 +172,22 @@ For production builds, gcc is used (it still generates slightly more efficient c

 ## Sanitizers {#sanitizers}

-**Address sanitizer**.
+### Address sanitizer
 We run functional and integration tests under ASan on per-commit basis.

-**Valgrind (Memcheck)**.
+### Valgrind (Memcheck)
 We run functional tests under Valgrind overnight. It takes multiple hours. Currently there is one known false positive in `re2` library, see [this article](https://research.swtch.com/sparse).

-**Undefined behaviour sanitizer.**
+### Undefined behaviour sanitizer
 We run functional and integration tests under ASan on per-commit basis.

-**Thread sanitizer**.
+### Thread sanitizer
 We run functional tests under TSan on per-commit basis. We still don’t run integration tests under TSan on per-commit basis.

-**Memory sanitizer**.
+### Memory sanitizer
 Currently we still don’t use MSan.

-**Debug allocator.**
+### Debug allocator
 Debug version of `jemalloc` is used for debug build.

 ## Fuzzing {#fuzzing}
@ -227,7 +222,7 @@ If you use `CLion` as an IDE, you can leverage some `clang-tidy` checks out of t

 ## Code Style {#code-style}

-Code style rules are described [here](https://clickhouse.tech/docs/en/development/style/).
+Code style rules are described [here](style.md).

 To check for some common style violations, you can use `utils/check-style` script.

--- a/docs/en/getting-started/example-datasets/index.md
+++ b/docs/en/getting-started/example-datasets/index.md
@ -1,13 +1,14 @@
 ---
 toc_folder_title: Example Datasets
-toc_priority: 12
+toc_priority: 15
 toc_title: Introduction
 ---

 # Example Datasets {#example-datasets}

-This section describes how to obtain example datasets and import them into ClickHouse.
-For some datasets example queries are also available.
+This section describes how to obtain example datasets and import them into ClickHouse. For some datasets example queries are also available.
+
+The list of documented datasets:

 -   [Anonymized Yandex.Metrica Dataset](../../getting-started/example-datasets/metrica.md)
 -   [Star Schema Benchmark](../../getting-started/example-datasets/star-schema.md)
--- a/docs/en/getting-started/playground.md
+++ b/docs/en/getting-started/playground.md
@ -37,6 +37,7 @@ The queries are executed as a read-only user. It implies some limitations:
 -   INSERT queries are not allowed

 The following settings are also enforced:
+
 - [max\_result\_bytes=10485760](../operations/settings/query_complexity/#max-result-bytes)
 - [max\_result\_rows=2000](../operations/settings/query_complexity/#setting-max_result_rows)
 - [result\_overflow\_mode=break](../operations/settings/query_complexity/#result-overflow-mode)
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@ -397,7 +397,6 @@ The cache is shared for the server and memory is allocated as needed. The cache
 ``` xml
 <mark_cache_size>5368709120</mark_cache_size>
 ```
-
 ## max\_server\_memory\_usage {#max_server_memory_usage}

 Limits total RAM usage by the ClickHouse server. You can specify it only for the default profile.
@ -411,11 +410,37 @@ Default value: `0`.

 **Additional Info**

-On hosts with low RAM and swap, you possibly need setting `max_server_memory_usage_to_ram_ratio > 1`.
+The default `max_server_memory_usage` value is calculated as `memory_amount * max_server_memory_usage_to_ram_ratio`.

 **See also**

 -   [max\_memory\_usage](../../operations/settings/query-complexity.md#settings_max_memory_usage)
+-   [max_server_memory_usage_to_ram_ratio](#max_server_memory_usage_to_ram_ratio)
+
+## max_server_memory_usage_to_ram_ratio {#max_server_memory_usage_to_ram_ratio}
+
+Defines the fraction of total physical RAM amount, available to the Clickhouse server. If the server tries to utilize more, the memory is cut down to the appropriate amount. 
+
+Possible values:
+
+-   Positive double.
+-   0 — The Clickhouse server can use all available RAM.
+
+Default value: `0`.
+
+**Usage**
+
+On hosts with low RAM and swap, you possibly need setting `max_server_memory_usage_to_ram_ratio` larger than 1.
+
+**Example**
+
+``` xml
+<max_server_memory_usage_to_ram_ratio>0.9</max_server_memory_usage_to_ram_ratio>
+```
+
+**See Also**
+
+-   [max_server_memory_usage](#max_server_memory_usage)

 ## max\_concurrent\_queries {#max-concurrent-queries}

--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@ -585,6 +585,31 @@ Possible values:

 Default value: 0.

+## network_compression_method {#network_compression_method}
+
+Sets the method of data compression that is used for communication between servers and between server and [clickhouse-client](../../interfaces/cli.md).
+
+Possible values:
+
+-   `LZ4` — sets LZ4 compression method.
+-   `ZSTD` — sets ZSTD compression method.
+
+Default value: `LZ4`.
+
+**See Also**
+
+-   [network_zstd_compression_level](#network_zstd_compression_level)
+
+## network_zstd_compression_level {#network_zstd_compression_level}
+
+Adjusts the level of ZSTD compression. Used only when [network_compression_method](#network_compression_method) is set to `ZSTD`.
+
+Possible values:
+
+-   Positive integer from 1 to 15.
+
+Default value: `1`.
+
 ## log\_queries {#settings-log-queries}

 Setting up query logging.
--- a/docs/en/operations/utilities/clickhouse-local.md
+++ b/docs/en/operations/utilities/clickhouse-local.md
@ -16,6 +16,8 @@ By default `clickhouse-local` does not have access to data on the same host, but
 !!! warning "Warning"
    It is not recommended to load production server configuration into `clickhouse-local` because data can be damaged in case of human error.

+For temporary data an unique temporary data directory is created by default. If you want to override this behavior the data directory can be explicitly specified with the `-- --path` option.
+
 ## Usage {#usage}

 Basic usage:
@ -40,6 +42,7 @@ Arguments:

 Also there are arguments for each ClickHouse configuration variable which are more commonly used instead of `--config-file`.

+
 ## Examples {#examples}

 ``` bash
--- a/docs/en/sql-reference/functions/splitting-merging-functions.md
+++ b/docs/en/sql-reference/functions/splitting-merging-functions.md
@ -111,4 +111,43 @@ SELECT alphaTokens('abca1abc')
 └─────────────────────────┘
 ```

+## extractAllGroups(text, regexp) {#extractallgroups}
+
+Extracts all groups from non-overlapping substrings matched by a regular expression.
+
+**Syntax** 
+
+``` sql
+extractAllGroups(text, regexp) 
+```
+
+**Parameters** 
+
+-   `text` — [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
+-   `regexp` — Regular expression. Constant. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
+
+**Returned values**
+
+-   If the function finds at least one matching group, it returns `Array(Array(String))` column, clustered by group_id (1 to N, where N is number of capturing groups in `regexp`).
+
+-   If there is no matching group, returns an empty array.
+
+Type: [Array](../data-types/array.md).
+
+**Example**
+
+Query:
+
+``` sql
+SELECT extractAllGroups('abc=123, 8="hkl"', '("[^"]+"|\\w+)=("[^"]+"|\\w+)');
+```
+
+Result:
+
+``` text
+┌─extractAllGroups('abc=123, 8="hkl"', '("[^"]+"|\\w+)=("[^"]+"|\\w+)')─┐
+│ [['abc','123'],['8','"hkl"']]                                         │
+└───────────────────────────────────────────────────────────────────────┘
+```
+
 [Original article](https://clickhouse.tech/docs/en/query_language/functions/splitting_merging_functions/) <!--hide-->
--- a/docs/ru/operations/server-configuration-parameters/settings.md
+++ b/docs/ru/operations/server-configuration-parameters/settings.md
@ -385,12 +385,37 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat

 **Дополнительная информация**

-На серверах с небольшим объёмом RAM и файла подкачки может потребоваться настройка `max_server_memory_usage_to_ram_ratio > 1`.
+Значение по умолчанию для `max_server_memory_usage` рассчитывается как `memory_amount * max_server_memory_usage_to_ram_ratio`.

 **См. также**

 -   [max_memory_usage](../settings/query-complexity.md#settings_max_memory_usage)

+## max_server_memory_usage_to_ram_ratio {#max_server_memory_usage_to_ram_ratio}
+
+Определяет долю оперативной памяти, доступную для использования сервером Clickhouse. Если сервер попытается использовать больше, предоставляемый ему объём памяти будет ограничен до расчётного значения. 
+
+Возможные значения:
+
+-   Положительное число с плавающей запятой.
+-   0 — сервер Clickhouse может использовать всю оперативную память.
+
+Значение по умолчанию: `0`.
+
+**Использование**
+
+На серверах с небольшим объёмом оперативной памяти и файла подкачки может потребоваться установить настройку `max_server_memory_usage_to_ram_ratio` в значение, большее 1.
+
+**Пример**
+
+``` xml
+<max_server_memory_usage_to_ram_ratio>0.9</max_server_memory_usage_to_ram_ratio>
+```
+
+**См. также**
+
+-   [max_server_memory_usage](#max_server_memory_usage)
+
 ## max\_connections {#max-connections}

 Максимальное количество входящих соединений.
--- a/docs/ru/operations/settings/settings.md
+++ b/docs/ru/operations/settings/settings.md
@ -520,6 +520,31 @@ ClickHouse использует этот параметр при чтении д

 Значение по умолчанию: 0.

+## network_compression_method {#network_compression_method}
+
+Задает метод сжатия данных, используемый при обмене данными между серверами и при обмене между сервером и [clickhouse-client](../../interfaces/cli.md).
+
+Возможные значения:
+
+-   `LZ4` — устанавливает метод сжатия LZ4.
+-   `ZSTD` — устанавливает метод сжатия ZSTD.
+
+Значение по умолчанию: `LZ4`.
+
+См. также:
+
+-   [network_zstd_compression_level](#network_zstd_compression_level)
+
+## network_zstd_compression_level {#network_zstd_compression_level}
+
+Регулирует уровень сжатия ZSTD. Используется только тогда, когда [network_compression_method](#network_compression_method) имеет значение `ZSTD`.
+
+Возможные значения:
+
+-   Положительное целое число от 1 до 15.
+
+Значение по умолчанию: `1`.
+
 ## log\_queries {#settings-log-queries}

 Установка логирования запроса.
--- a/docs/ru/sql-reference/functions/splitting-merging-functions.md
+++ b/docs/ru/sql-reference/functions/splitting-merging-functions.md
@ -33,4 +33,42 @@ SELECT alphaTokens('abca1abc')
 └─────────────────────────┘
 ```

+## extractAllGroups(text, regexp) {#extractallgroups}
+
+Выделяет все группы из неперекрывающихся подстрок, которые соответствуют регулярному выражению.
+
+**Синтаксис** 
+
+``` sql
+extractAllGroups(text, regexp) 
+```
+
+**Параметры** 
+
+-   `text` — [String](../data-types/string.md) или [FixedString](../data-types/fixedstring.md).
+-   `regexp` — Регулярное выражение. Константа. [String](../data-types/string.md) или [FixedString](../data-types/fixedstring.md).
+
+**Возвращаемые значения**
+
+-   Если найдена хотя бы одна подходящая группа, функция возвращает столбец вида `Array(Array(String))`, сгруппированный по идентификатору группы (от 1 до N, где N — количество групп с захватом содержимого в `regexp`).
+
+-   Если подходящих групп не найдено, возвращает пустой массив.
+
+Тип: [Array](../data-types/array.md).
+
+**Пример использования**
+
+Запрос:
+
+``` sql
+SELECT extractAllGroups('abc=123, 8="hkl"', '("[^"]+"|\\w+)=("[^"]+"|\\w+)');
+```
+
+Результат:
+
+``` text
+┌─extractAllGroups('abc=123, 8="hkl"', '("[^"]+"|\\w+)=("[^"]+"|\\w+)')─┐
+│ [['abc','123'],['8','"hkl"']]                                         │
+└───────────────────────────────────────────────────────────────────────┘
+```
 [Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/splitting_merging_functions/) <!--hide-->
--- a/docs/ru/sql-reference/statements/grant.md
+++ b/docs/ru/sql-reference/statements/grant.md
@ -293,7 +293,7 @@ Examples of how this hierarchy is treated:
 -   The `MODIFY SETTING` privilege allows modifying table engine settings. It doesn’t affect settings or server configuration parameters.
 -   The `ATTACH` operation needs the [CREATE](#grant-create) privilege.
 -   The `DETACH` operation needs the [DROP](#grant-drop) privilege.
-   To stop mutation by the [KILL MUTATION](../../sql-reference/statements/misc.md#kill-mutation) query, you need to have a privilege to start this mutation. For example, if you want to stop the `ALTER UPDATE` query, you need the `ALTER UPDATE`, `ALTER TABLE`, or `ALTER` privilege.
+-   To stop mutation by the [KILL MUTATION](../../sql-reference/statements/misc.md#kill-mutation-statement) query, you need to have a privilege to start this mutation. For example, if you want to stop the `ALTER UPDATE` query, you need the `ALTER UPDATE`, `ALTER TABLE`, or `ALTER` privilege.

 ### CREATE {#grant-create}

@ -312,7 +312,7 @@ Allows executing [CREATE](../../sql-reference/statements/create.md) and [ATTACH]

 ### DROP {#grant-drop}

-Allows executing [DROP](../../sql-reference/statements/misc.md#drop) and [DETACH](../../sql-reference/statements/misc.md#detach) queries according to the following hierarchy of privileges:
+Allows executing [DROP](../../sql-reference/statements/misc.md#drop) and [DETACH](../../sql-reference/statements/misc.md#detach-statement) queries according to the following hierarchy of privileges:

 -   `DROP`. Level:
    -   `DROP DATABASE`. Level: `DATABASE`
--- a/docs/tools/website.py
+++ b/docs/tools/website.py
@ -126,7 +126,7 @@ def adjust_markdown_html(content):
 def minify_html(content):
    return htmlmin.minify(content,
                          remove_comments=False,
-                          remove_empty_space=True,
+                          remove_empty_space=False,
                          remove_all_empty_space=False,
                          reduce_empty_attributes=True,
                          reduce_boolean_attributes=False,
--- a/src/DataStreams/CheckSortedBlockInputStream.cpp
+++ b/src/DataStreams/CheckSortedBlockInputStream.cpp
@ -62,8 +62,8 @@ Block CheckSortedBlockInputStream::readImpl()
            else if (res > 0)
            {
                throw Exception(ErrorCodes::LOGICAL_ERROR,
-                    "Sort order of blocks violated for column {}, left: {}, right: {}.",
-                    backQuoteIfNeed(elem.column_name),
+                    "Sort order of blocks violated for column number {}, left: {}, right: {}.",
+                    column_number,
                    applyVisitor(FieldVisitorDump(), (*left_col)[left_index]),
                    applyVisitor(FieldVisitorDump(), (*right_col)[right_index]));
            }
--- a/src/Functions/CMakeLists.txt
+++ b/src/Functions/CMakeLists.txt
@ -89,7 +89,7 @@ endif()
 target_link_libraries(clickhouse_functions PRIVATE hyperscan)

 if(USE_SIMDJSON)
-    target_link_libraries(clickhouse_functions PRIVATE ${SIMDJSON_LIBRARY})
+    target_link_libraries(clickhouse_functions PRIVATE simdjson)
 endif()

 if(USE_RAPIDJSON)
--- a/src/Functions/DummyJSONParser.h
+++ b/src/Functions/DummyJSONParser.h
@ -1,6 +1,5 @@
 #pragma once

-#include <common/StringRef.h>
 #include <Common/Exception.h>
 #include <Core/Types.h>

@ -15,43 +14,73 @@ namespace ErrorCodes
 /// It can't do anything useful and just throws an exception.
 struct DummyJSONParser
 {
-    static constexpr bool need_preallocate = false;
-    void preallocate(size_t) {}
+    class Array;
+    class Object;

-    bool parse(const StringRef &) { throw Exception{"Functions JSON* are not supported without AVX2", ErrorCodes::NOT_IMPLEMENTED}; }
+    class Element
+    {
+    public:
+        Element() {}
+        bool isInt64() const { return false; }
+        bool isUInt64() const { return false; }
+        bool isDouble() const { return false; }
+        bool isString() const { return false; }
+        bool isArray() const { return false; }
+        bool isObject() const { return false; }
+        bool isBool() const { return false; }
+        bool isNull() const { return false; }

-    using Iterator = std::nullptr_t;
-    Iterator getRoot() const { return nullptr; }
+        Int64 getInt64() const { return 0; }
+        UInt64 getUInt64() const { return 0; }
+        double getDouble() const { return 0; }
+        bool getBool() const { return false; }
+        std::string_view getString() const { return {}; }
+        Array getArray() const;
+        Object getObject() const;
+    };

-    static bool isInt64(const Iterator &) { return false; }
-    static bool isUInt64(const Iterator &) { return false; }
-    static bool isDouble(const Iterator &) { return false; }
-    static bool isString(const Iterator &) { return false; }
-    static bool isArray(const Iterator &) { return false; }
-    static bool isObject(const Iterator &) { return false; }
-    static bool isBool(const Iterator &) { return false; }
-    static bool isNull(const Iterator &) { return true; }
+    class Array
+    {
+    public:
+        class Iterator
+        {
+        public:
+            Element operator*() const { return {}; }
+            Iterator & operator ++() { return *this; }
+            Iterator operator ++(int) { return *this; }
+            friend bool operator ==(const Iterator &, const Iterator &) { return true; }
+            friend bool operator !=(const Iterator &, const Iterator &) { return false; }
+        };

-    static Int64 getInt64(const Iterator &) { return 0; }
-    static UInt64 getUInt64(const Iterator &) { return 0; }
-    static double getDouble(const Iterator &) { return 0; }
-    static bool getBool(const Iterator &) { return false; }
-    static StringRef getString(const Iterator &) { return {}; }
+        Iterator begin() const { return {}; }
+        Iterator end() const { return {}; }
+        size_t size() const { return 0; }
+        Element operator[](size_t) const { return {}; }
+    };

-    static size_t sizeOfArray(const Iterator &) { return 0; }
-    static bool firstArrayElement(Iterator &) { return false; }
-    static bool arrayElementByIndex(Iterator &, size_t) { return false; }
-    static bool nextArrayElement(Iterator &) { return false; }
+    class Object
+    {
+    public:
+        using KeyValuePair = std::pair<std::string_view, Element>;

-    static size_t sizeOfObject(const Iterator &) { return 0; }
-    static bool firstObjectMember(Iterator &) { return false; }
-    static bool firstObjectMember(Iterator &, StringRef &) { return false; }
-    static bool objectMemberByIndex(Iterator &, size_t) { return false; }
-    static bool objectMemberByName(Iterator &, const StringRef &) { return false; }
-    static bool nextObjectMember(Iterator &) { return false; }
-    static bool nextObjectMember(Iterator &, StringRef &) { return false; }
-    static bool isObjectMember(const Iterator &) { return false; }
-    static StringRef getKey(const Iterator &) { return {}; }
+        class Iterator
+        {
+        public:
+            KeyValuePair operator *() const { return {}; }
+            Iterator & operator ++() { return *this; }
+            Iterator operator ++(int) { return *this; }
+            friend bool operator ==(const Iterator &, const Iterator &) { return true; }
+            friend bool operator !=(const Iterator &, const Iterator &) { return false; }
+        };
+
+        Iterator begin() const { return {}; }
+        Iterator end() const { return {}; }
+        size_t size() const { return 0; }
+        KeyValuePair operator[](size_t) const { return {}; }
+        bool find(const std::string_view &, Element &) const { return false; }
+    };
+
+    bool parse(const std::string_view &, Element &) { throw Exception{"Functions JSON* are not supported", ErrorCodes::NOT_IMPLEMENTED}; }
 };

 }
--- a/src/Functions/FunctionsJSON.cpp
+++ b/src/Functions/FunctionsJSON.cpp
@ -4,6 +4,57 @@

 namespace DB
 {
+namespace ErrorCodes
+{
+    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+}
+
+
+std::vector<FunctionJSONHelpers::Move> FunctionJSONHelpers::prepareMoves(const char * function_name, Block & block, const ColumnNumbers & arguments, size_t first_index_argument, size_t num_index_arguments)
+{
+    std::vector<Move> moves;
+    moves.reserve(num_index_arguments);
+    for (const auto i : ext::range(first_index_argument, first_index_argument + num_index_arguments))
+    {
+        const auto & column = block.getByPosition(arguments[i]);
+        if (!isString(column.type) && !isInteger(column.type))
+            throw Exception{"The argument " + std::to_string(i + 1) + " of function " + String(function_name)
+                                + " should be a string specifying key or an integer specifying index, illegal type: " + column.type->getName(),
+                            ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
+
+        if (column.column && isColumnConst(*column.column))
+        {
+            const auto & column_const = assert_cast<const ColumnConst &>(*column.column);
+            if (isString(column.type))
+                moves.emplace_back(MoveType::ConstKey, column_const.getValue<String>());
+            else
+                moves.emplace_back(MoveType::ConstIndex, column_const.getInt(0));
+        }
+        else
+        {
+            if (isString(column.type))
+                moves.emplace_back(MoveType::Key, "");
+            else
+                moves.emplace_back(MoveType::Index, 0);
+        }
+    }
+    return moves;
+}
+
+size_t FunctionJSONHelpers::calculateMaxSize(const ColumnString::Offsets & offsets)
+{
+    size_t max_size = 0;
+    for (const auto i : ext::range(0, offsets.size()))
+    {
+        size_t size = offsets[i] - offsets[i - 1];
+        if (max_size < size)
+            max_size = size;
+    }
+    if (max_size)
+        --max_size;
+    return max_size;
+}
+

 void registerFunctionsJSON(FunctionFactory & factory)
 {
--- a/src/Functions/FunctionsJSON.h
+++ b/src/Functions/FunctionsJSON.h
--- a/src/Functions/RapidJSONParser.h
+++ b/src/Functions/RapidJSONParser.h
@ -6,9 +6,7 @@

 #if USE_RAPIDJSON
 #    include <Core/Types.h>
-#    include <Common/Exception.h>
-#    include <common/StringRef.h>
-
+#    include <common/defines.h>
 #    include <rapidjson/document.h>


@ -19,197 +17,130 @@ namespace DB
 /// It provides ability to parse JSONs using rapidjson library.
 struct RapidJSONParser
 {
-    static constexpr bool need_preallocate = false;
-    void preallocate(size_t) {}
+    class Array;
+    class Object;

-    bool parse(const StringRef & json)
-    {
-        rapidjson::MemoryStream ms(json.data, json.size);
-        rapidjson::EncodedInputStream<rapidjson::UTF8<>, rapidjson::MemoryStream> is(ms);
-        document.ParseStream(is);
-        return !document.HasParseError() && (ms.Tell() == json.size);
-    }
-
-    struct Iterator
+    class Element
    {
    public:
-        Iterator() {}
-        Iterator(const rapidjson::Document & document_) : value(&document_) {}
-        Iterator(const Iterator & src)
-            : value(src.value)
-            , is_object_member(src.is_object_member)
-            , current_in_array(src.current_in_array)
-            , end_of_array(src.end_of_array) {}
+        ALWAYS_INLINE Element() {}
+        ALWAYS_INLINE Element(const rapidjson::Value & value_) : ptr(&value_) {}

-        Iterator & operator =(const Iterator & src)
+        ALWAYS_INLINE bool isInt64() const { return ptr->IsInt64(); }
+        ALWAYS_INLINE bool isUInt64() const { return ptr->IsUint64(); }
+        ALWAYS_INLINE bool isDouble() const { return ptr->IsDouble(); }
+        ALWAYS_INLINE bool isString() const { return ptr->IsString(); }
+        ALWAYS_INLINE bool isArray() const { return ptr->IsArray(); }
+        ALWAYS_INLINE bool isObject() const { return ptr->IsObject(); }
+        ALWAYS_INLINE bool isBool() const { return ptr->IsBool(); }
+        ALWAYS_INLINE bool isNull() const { return ptr->IsNull(); }
+
+        ALWAYS_INLINE Int64 getInt64() const { return ptr->GetInt64(); }
+        ALWAYS_INLINE UInt64 getUInt64() const { return ptr->GetUint64(); }
+        ALWAYS_INLINE double getDouble() const { return ptr->GetDouble(); }
+        ALWAYS_INLINE bool getBool() const { return ptr->GetBool(); }
+        ALWAYS_INLINE std::string_view getString() const { return {ptr->GetString(), ptr->GetStringLength()}; }
+        Array getArray() const;
+        Object getObject() const;
+
+    private:
+        const rapidjson::Value * ptr = nullptr;
+    };
+
+    class Array
+    {
+    public:
+        class Iterator
        {
-            value = src.value;
-            is_object_member = src.is_object_member;
-            current_in_array = src.current_in_array;
-            end_of_array = src.end_of_array;
-            return *this;
+        public:
+            ALWAYS_INLINE Iterator(const rapidjson::Value::ConstValueIterator & it_) : it(it_) {}
+            ALWAYS_INLINE Element operator*() const { return *it; }
+            ALWAYS_INLINE Iterator & operator ++() { ++it; return *this; }
+            ALWAYS_INLINE Iterator operator ++(int) { auto res = *this; ++it; return res; }
+            ALWAYS_INLINE friend bool operator ==(const Iterator & left, const Iterator & right) { return left.it == right.it; }
+            ALWAYS_INLINE friend bool operator !=(const Iterator & left, const Iterator & right) { return !(left == right); }
+        private:
+            rapidjson::Value::ConstValueIterator it;
+        };
+
+        ALWAYS_INLINE Array(const rapidjson::Value & value_) : ptr(&value_) {}
+        ALWAYS_INLINE Iterator begin() const { return ptr->Begin(); }
+        ALWAYS_INLINE Iterator end() const { return ptr->End(); }
+        ALWAYS_INLINE size_t size() const { return ptr->Size(); }
+        ALWAYS_INLINE Element operator[](size_t index) const { return *(ptr->Begin() + index); }
+
+    private:
+        const rapidjson::Value * ptr = nullptr;
+    };
+
+    class Object
+    {
+    public:
+        using KeyValuePair = std::pair<std::string_view, Element>;
+
+        class Iterator
+        {
+        public:
+            ALWAYS_INLINE Iterator(const rapidjson::Value::ConstMemberIterator & it_) : it(it_) {}
+            ALWAYS_INLINE KeyValuePair operator *() const { std::string_view key{it->name.GetString(), it->name.GetStringLength()}; return {key, it->value}; }
+            ALWAYS_INLINE Iterator & operator ++() { ++it; return *this; }
+            ALWAYS_INLINE Iterator operator ++(int) { auto res = *this; ++it; return res; }
+            ALWAYS_INLINE friend bool operator ==(const Iterator & left, const Iterator & right) { return left.it == right.it; }
+            ALWAYS_INLINE friend bool operator !=(const Iterator & left, const Iterator & right) { return !(left == right); }
+        private:
+            rapidjson::Value::ConstMemberIterator it;
+        };
+
+        ALWAYS_INLINE Object(const rapidjson::Value & value_) : ptr(&value_) {}
+        ALWAYS_INLINE Iterator begin() const { return ptr->MemberBegin(); }
+        ALWAYS_INLINE Iterator end() const { return ptr->MemberEnd(); }
+        ALWAYS_INLINE size_t size() const { return ptr->MemberCount(); }
+
+        ALWAYS_INLINE KeyValuePair operator[](size_t index) const
+        {
+            auto it = ptr->MemberBegin() + index;
+            std::string_view key{it->name.GetString(), it->name.GetStringLength()};
+            return KeyValuePair{key, it->value};
        }

-        bool isInt64() const { return value->IsInt64(); }
-        bool isUInt64() const { return value->IsUint64(); }
-        bool isDouble() const { return value->IsDouble(); }
-        bool isBool() const { return value->IsBool(); }
-        bool isString() const { return value->IsString(); }
-        bool isArray() const { return value->IsArray(); }
-        bool isObject() const { return value->IsObject(); }
-        bool isNull() const { return value->IsNull(); }
-
-        Int64 getInt64() const { return value->GetInt64(); }
-        UInt64 getUInt64() const { return value->GetUint64(); }
-        double getDouble() const { return value->GetDouble(); }
-        bool getBool() const { return value->GetBool(); }
-        StringRef getString() const { return {value->GetString(), value->GetStringLength()}; }
-
-        size_t sizeOfArray() const { return value->Size(); }
-
-        bool arrayElementByIndex(size_t index)
+        ALWAYS_INLINE bool find(const std::string_view & key, Element & result) const
        {
-            if (index >= value->Size())
+            auto it = ptr->FindMember(rapidjson::StringRef(key.data(), key.length()));
+            if (it == ptr->MemberEnd())
                return false;
-            setRange(value->Begin() + index, value->End());
-            value = current_in_array++;
+
+            result = it->value;
            return true;
        }

-        bool nextArrayElement()
-        {
-            if (current_in_array == end_of_array)
-                return false;
-            value = current_in_array++;
-            return true;
-        }
-
-        size_t sizeOfObject() const { return value->MemberCount(); }
-
-        bool objectMemberByIndex(size_t index)
-        {
-            if (index >= value->MemberCount())
-                return false;
-            setRange(value->MemberBegin() + index, value->MemberEnd());
-            value = &(current_in_object++)->value;
-            return true;
-        }
-
-        bool objectMemberByIndex(size_t index, StringRef & key)
-        {
-            if (index >= value->MemberCount())
-                return false;
-            setRange(value->MemberBegin() + index, value->MemberEnd());
-            key = getKeyImpl(current_in_object);
-            value = &(current_in_object++)->value;
-            return true;
-        }
-
-        bool objectMemberByName(const StringRef & name)
-        {
-            auto it = value->FindMember(name.data);
-            if (it == value->MemberEnd())
-                return false;
-            setRange(it, value->MemberEnd());
-            value = &(current_in_object++)->value;
-            return true;
-        }
-
-        bool nextObjectMember()
-        {
-            if (current_in_object == end_of_object)
-                return false;
-            value = &(current_in_object++)->value;
-            return true;
-        }
-
-        bool nextObjectMember(StringRef & key)
-        {
-            if (current_in_object == end_of_object)
-                return false;
-            key = getKeyImpl(current_in_object);
-            value = &(current_in_object++)->value;
-            return true;
-        }
-
-        bool isObjectMember() const { return is_object_member; }
-
-        StringRef getKey() const
-        {
-            return getKeyImpl(current_in_object - 1);
-        }
-
    private:
-        void setRange(rapidjson::Value::ConstValueIterator current, rapidjson::Value::ConstValueIterator end)
-        {
-            current_in_array = &*current;
-            end_of_array = &*end;
-            is_object_member = false;
-        }
-
-        void setRange(rapidjson::Value::ConstMemberIterator current, rapidjson::Value::ConstMemberIterator end)
-        {
-            current_in_object = &*current;
-            end_of_object = &*end;
-            is_object_member = true;
-        }
-
-        static StringRef getKeyImpl(const rapidjson::GenericMember<rapidjson::UTF8<>, rapidjson::MemoryPoolAllocator<>> * member)
-        {
-            const auto & name = member->name;
-            return {name.GetString(), name.GetStringLength()};
-        }
-
-        const rapidjson::Value * value = nullptr;
-        bool is_object_member = false;
-
-        union
-        {
-            const rapidjson::GenericMember<rapidjson::UTF8<>, rapidjson::MemoryPoolAllocator<>> * current_in_object;
-            const rapidjson::Value * current_in_array;
-        };
-        union
-        {
-            const rapidjson::GenericMember<rapidjson::UTF8<>, rapidjson::MemoryPoolAllocator<>> * end_of_object;
-            const rapidjson::Value * end_of_array;
-        };
+        const rapidjson::Value * ptr = nullptr;
    };

-    Iterator getRoot() { return Iterator{document}; }
-
-    static bool isInt64(const Iterator & it) { return it.isInt64(); }
-    static bool isUInt64(const Iterator & it) { return it.isUInt64(); }
-    static bool isDouble(const Iterator & it) { return it.isDouble(); }
-    static bool isBool(const Iterator & it) { return it.isBool(); }
-    static bool isString(const Iterator & it) { return it.isString(); }
-    static bool isArray(const Iterator & it) { return it.isArray(); }
-    static bool isObject(const Iterator & it) { return it.isObject(); }
-    static bool isNull(const Iterator & it) { return it.isNull(); }
-
-    static Int64 getInt64(const Iterator & it) { return it.getInt64(); }
-    static UInt64 getUInt64(const Iterator & it) { return it.getUInt64(); }
-    static double getDouble(const Iterator & it) { return it.getDouble(); }
-    static bool getBool(const Iterator & it) { return it.getBool(); }
-    static StringRef getString(const Iterator & it) { return it.getString(); }
-
-    static size_t sizeOfArray(const Iterator & it) { return it.sizeOfArray(); }
-    static bool firstArrayElement(Iterator & it) { return it.arrayElementByIndex(0); }
-    static bool arrayElementByIndex(Iterator & it, size_t index) { return it.arrayElementByIndex(index); }
-    static bool nextArrayElement(Iterator & it) { return it.nextArrayElement(); }
-
-    static size_t sizeOfObject(const Iterator & it) { return it.sizeOfObject(); }
-    static bool firstObjectMember(Iterator & it) { return it.objectMemberByIndex(0); }
-    static bool firstObjectMember(Iterator & it, StringRef & first_key) { return it.objectMemberByIndex(0, first_key); }
-    static bool objectMemberByIndex(Iterator & it, size_t index) { return it.objectMemberByIndex(index); }
-    static bool objectMemberByName(Iterator & it, const StringRef & name) { return it.objectMemberByName(name); }
-    static bool nextObjectMember(Iterator & it) { return it.nextObjectMember(); }
-    static bool nextObjectMember(Iterator & it, StringRef & next_key) { return it.nextObjectMember(next_key); }
-    static bool isObjectMember(const Iterator & it) { return it.isObjectMember(); }
-    static StringRef getKey(const Iterator & it) { return it.getKey(); }
+    bool parse(const std::string_view & json, Element & result)
+    {
+        rapidjson::MemoryStream ms(json.data(), json.size());
+        rapidjson::EncodedInputStream<rapidjson::UTF8<>, rapidjson::MemoryStream> is(ms);
+        document.ParseStream(is);
+        if (document.HasParseError() || (ms.Tell() != json.size()))
+            return false;
+        result = document;
+        return true;
+    }

 private:
    rapidjson::Document document;
 };

+inline ALWAYS_INLINE RapidJSONParser::Array RapidJSONParser::Element::getArray() const
+{
+    return *ptr;
+}
+
+inline ALWAYS_INLINE RapidJSONParser::Object RapidJSONParser::Element::getObject() const
+{
+    return *ptr;
+}
+
 }
 #endif
--- a/src/Functions/SimdJSONParser.h
+++ b/src/Functions/SimdJSONParser.h
@ -7,9 +7,8 @@
 #if USE_SIMDJSON
 #    include <Core/Types.h>
 #    include <Common/Exception.h>
-#    include <common/StringRef.h>
-
-#    include <simdjson/jsonparser.h>
+#    include <common/defines.h>
+#    include <simdjson.h>


 namespace DB
@ -23,121 +22,138 @@ namespace ErrorCodes
 /// It provides ability to parse JSONs using simdjson library.
 struct SimdJSONParser
 {
-    static constexpr bool need_preallocate = true;
+    class Array;
+    class Object;

-    void preallocate(size_t max_size)
+    class Element
    {
-        if (!pj.allocate_capacity(max_size))
-            throw Exception{"Can not allocate memory for " + std::to_string(max_size) + " units when parsing JSON",
+    public:
+        ALWAYS_INLINE Element() {}
+        ALWAYS_INLINE Element(const simdjson::dom::element & element_) : element(element_) {}
+
+        ALWAYS_INLINE bool isInt64() const { return element.type() == simdjson::dom::element_type::INT64; }
+        ALWAYS_INLINE bool isUInt64() const { return element.type() == simdjson::dom::element_type::UINT64; }
+        ALWAYS_INLINE bool isDouble() const { return element.type() == simdjson::dom::element_type::DOUBLE; }
+        ALWAYS_INLINE bool isString() const { return element.type() == simdjson::dom::element_type::STRING; }
+        ALWAYS_INLINE bool isArray() const { return element.type() == simdjson::dom::element_type::ARRAY; }
+        ALWAYS_INLINE bool isObject() const { return element.type() == simdjson::dom::element_type::OBJECT; }
+        ALWAYS_INLINE bool isBool() const { return element.type() == simdjson::dom::element_type::BOOL; }
+        ALWAYS_INLINE bool isNull() const { return element.type() == simdjson::dom::element_type::NULL_VALUE; }
+
+        ALWAYS_INLINE Int64 getInt64() const { return element.get_int64().first; }
+        ALWAYS_INLINE UInt64 getUInt64() const { return element.get_uint64().first; }
+        ALWAYS_INLINE double getDouble() const { return element.get_double().first; }
+        ALWAYS_INLINE bool getBool() const { return element.get_bool().first; }
+        ALWAYS_INLINE std::string_view getString() const { return element.get_string().first; }
+        ALWAYS_INLINE Array getArray() const;
+        ALWAYS_INLINE Object getObject() const;
+
+    private:
+        simdjson::dom::element element;
+    };
+
+    class Array
+    {
+    public:
+        class Iterator
+        {
+        public:
+            ALWAYS_INLINE Iterator(const simdjson::dom::array::iterator & it_) : it(it_) {}
+            ALWAYS_INLINE Element operator *() const { return *it; }
+            ALWAYS_INLINE Iterator & operator ++() { ++it; return *this; }
+            ALWAYS_INLINE Iterator operator ++(int) { auto res = *this; ++it; return res; }
+            ALWAYS_INLINE friend bool operator !=(const Iterator & left, const Iterator & right) { return left.it != right.it; }
+            ALWAYS_INLINE friend bool operator ==(const Iterator & left, const Iterator & right) { return !(left != right); }
+        private:
+            simdjson::dom::array::iterator it;
+        };
+
+        ALWAYS_INLINE Array(const simdjson::dom::array & array_) : array(array_) {}
+        ALWAYS_INLINE Iterator begin() const { return array.begin(); }
+        ALWAYS_INLINE Iterator end() const { return array.end(); }
+        ALWAYS_INLINE size_t size() const { return array.size(); }
+        ALWAYS_INLINE Element operator[](size_t index) const { return array.at(index).first; }
+
+    private:
+        simdjson::dom::array array;
+    };
+
+    class Object
+    {
+    public:
+        using KeyValuePair = std::pair<std::string_view, Element>;
+
+        class Iterator
+        {
+        public:
+            ALWAYS_INLINE Iterator(const simdjson::dom::object::iterator & it_) : it(it_) {}
+            ALWAYS_INLINE KeyValuePair operator *() const { const auto & res = *it; return {res.key, res.value}; }
+            ALWAYS_INLINE Iterator & operator ++() { ++it; return *this; }
+            ALWAYS_INLINE Iterator operator ++(int) { auto res = *this; ++it; return res; }
+            ALWAYS_INLINE friend bool operator !=(const Iterator & left, const Iterator & right) { return left.it != right.it; }
+            ALWAYS_INLINE friend bool operator ==(const Iterator & left, const Iterator & right) { return !(left != right); }
+        private:
+            simdjson::dom::object::iterator it;
+        };
+
+        ALWAYS_INLINE Object(const simdjson::dom::object & object_) : object(object_) {}
+        ALWAYS_INLINE Iterator begin() const { return object.begin(); }
+        ALWAYS_INLINE Iterator end() const { return object.end(); }
+        ALWAYS_INLINE size_t size() const { return object.size(); }
+
+        KeyValuePair operator [](size_t index) const
+        {
+            Iterator it = begin();
+            while (index--)
+                ++it;
+            return *it;
+        }
+
+        ALWAYS_INLINE bool find(const std::string_view & key, Element & result) const
+        {
+            auto x = object.at_key(key);
+            if (x.error())
+                return false;
+
+            result = x.first;
+            return true;
+        }
+
+    private:
+        simdjson::dom::object object;
+    };
+
+    void reserve(size_t max_size)
+    {
+        if (parser.allocate(max_size) != simdjson::error_code::SUCCESS)
+            throw Exception{"Couldn't allocate " + std::to_string(max_size) + " bytes when parsing JSON",
                            ErrorCodes::CANNOT_ALLOCATE_MEMORY};
    }

-    bool parse(const StringRef & json) { return !json_parse(json.data, json.size, pj); }
-
-    using Iterator = simdjson::ParsedJson::Iterator;
-    Iterator getRoot() { return Iterator{pj}; }
-
-    static bool isInt64(const Iterator & it) { return it.is_integer(); }
-    static bool isUInt64(const Iterator &) { return false; /* See https://github.com/lemire/simdjson/issues/68 */ }
-    static bool isDouble(const Iterator & it) { return it.is_double(); }
-    static bool isString(const Iterator & it) { return it.is_string(); }
-    static bool isArray(const Iterator & it) { return it.is_array(); }
-    static bool isObject(const Iterator & it) { return it.is_object(); }
-    static bool isBool(const Iterator & it) { return it.get_type() == 't' || it.get_type() == 'f'; }
-    static bool isNull(const Iterator & it) { return it.is_null(); }
-
-    static Int64 getInt64(const Iterator & it) { return it.get_integer(); }
-    static UInt64 getUInt64(const Iterator &) { return 0; /* isUInt64() never returns true */ }
-    static double getDouble(const Iterator & it) { return it.get_double(); }
-    static bool getBool(const Iterator & it) { return it.get_type() == 't'; }
-    static StringRef getString(const Iterator & it) { return StringRef{it.get_string(), it.get_string_length()}; }
-
-    static size_t sizeOfArray(const Iterator & it)
+    bool parse(const std::string_view & json, Element & result)
    {
-        size_t size = 0;
-        Iterator it2 = it;
-        if (it2.down())
-        {
-            do
-                ++size;
-            while (it2.next());
-        }
-        return size;
-    }
-
-    static bool firstArrayElement(Iterator & it) { return it.down(); }
-
-    static bool arrayElementByIndex(Iterator & it, size_t index)
-    {
-        if (!it.down())
+        auto document = parser.parse(json.data(), json.size());
+        if (document.error())
            return false;
-        while (index--)
-            if (!it.next())
-                return false;
+
+        result = document.first;
        return true;
    }

-    static bool nextArrayElement(Iterator & it) { return it.next(); }
-
-    static size_t sizeOfObject(const Iterator & it)
-    {
-        size_t size = 0;
-        Iterator it2 = it;
-        if (it2.down())
-        {
-            do
-                ++size;
-            while (it2.next() && it2.next()); //-V501
-        }
-        return size;
-    }
-
-    static bool firstObjectMember(Iterator & it) { return it.down() && it.next(); }
-
-    static bool firstObjectMember(Iterator & it, StringRef & first_key)
-    {
-        if (!it.down())
-            return false;
-        first_key.data = it.get_string();
-        first_key.size = it.get_string_length();
-        return it.next();
-    }
-
-    static bool objectMemberByIndex(Iterator & it, size_t index)
-    {
-        if (!it.down())
-            return false;
-        while (index--)
-            if (!it.next() || !it.next()) //-V501
-                return false;
-        return it.next();
-    }
-
-    static bool objectMemberByName(Iterator & it, const StringRef & name) { return it.move_to_key(name.data); }
-    static bool nextObjectMember(Iterator & it) { return it.next() && it.next(); } //-V501
-
-    static bool nextObjectMember(Iterator & it, StringRef & next_key)
-    {
-        if (!it.next())
-            return false;
-        next_key.data = it.get_string();
-        next_key.size = it.get_string_length();
-        return it.next();
-    }
-
-    static bool isObjectMember(const Iterator & it) { return it.get_scope_type() == '{'; }
-
-    static StringRef getKey(const Iterator & it)
-    {
-        Iterator it2 = it;
-        it2.prev();
-        return StringRef{it2.get_string(), it2.get_string_length()};
-    }
-
 private:
-    simdjson::ParsedJson pj;
+    simdjson::dom::parser parser;
 };

+inline ALWAYS_INLINE SimdJSONParser::Array SimdJSONParser::Element::getArray() const
+{
+    return element.get_array().first;
+}
+
+inline ALWAYS_INLINE SimdJSONParser::Object SimdJSONParser::Element::getObject() const
+{
+    return element.get_object().first;
+}
+
 }

 #endif
--- a/src/IO/WriteHelpers.h
+++ b/src/IO/WriteHelpers.h
@ -89,18 +89,22 @@ inline void writeStringBinary(const std::string & s, WriteBuffer & buf)
    buf.write(s.data(), s.size());
 }

-inline void writeStringBinary(const char * s, WriteBuffer & buf)
-{
-    writeVarUInt(strlen(s), buf);
-    buf.write(s, strlen(s));
-}
-
 inline void writeStringBinary(const StringRef & s, WriteBuffer & buf)
 {
    writeVarUInt(s.size, buf);
    buf.write(s.data, s.size);
 }

+inline void writeStringBinary(const char * s, WriteBuffer & buf)
+{
+    writeStringBinary(StringRef{s}, buf);
+}
+
+inline void writeStringBinary(const std::string_view & s, WriteBuffer & buf)
+{
+    writeStringBinary(StringRef{s}, buf);
+}
+

 template <typename T>
 void writeVectorBinary(const std::vector<T> & v, WriteBuffer & buf)
@ -413,15 +417,19 @@ void writeAnyEscapedString(const char * begin, const char * end, WriteBuffer & b
 }


-inline void writeJSONString(const String & s, WriteBuffer & buf, const FormatSettings & settings)
+inline void writeJSONString(const StringRef & s, WriteBuffer & buf, const FormatSettings & settings)
 {
-    writeJSONString(s.data(), s.data() + s.size(), buf, settings);
+    writeJSONString(s.data, s.data + s.size, buf, settings);
 }

-
-inline void writeJSONString(const StringRef & ref, WriteBuffer & buf, const FormatSettings & settings)
+inline void writeJSONString(const std::string_view & s, WriteBuffer & buf, const FormatSettings & settings)
 {
-    writeJSONString(ref.data, ref.data + ref.size, buf, settings);
+    writeJSONString(StringRef{s}, buf, settings);
+}
+
+inline void writeJSONString(const String & s, WriteBuffer & buf, const FormatSettings & settings)
+{
+    writeJSONString(StringRef{s}, buf, settings);
 }


--- a/src/Interpreters/MarkTableIdentifiersVisitor.cpp
+++ b/src/Interpreters/MarkTableIdentifiersVisitor.cpp
@ -47,6 +47,10 @@ void MarkTableIdentifiersMatcher::visit(const ASTFunction & func, ASTPtr &, Data
    // First argument of dictGet can be a dictionary name, perhaps with a database.
    if (functionIsJoinGet(func.name) || functionIsDictGet(func.name))
    {
+        if (func.arguments->children.empty())
+        {
+            return;
+        }
        auto & ast = func.arguments->children.at(0);
        auto opt_name = tryGetIdentifierName(ast);
        if (opt_name && !data.aliases.count(*opt_name))
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@ -313,6 +313,18 @@ void TCPHandler::runImpl()
            state.io.onException();
            exception.emplace(Exception::CreateFromPocoTag{}, e);
        }
+// Server should die on std logic errors in debug, like with assert()
+// or ErrorCodes::LOGICAL_ERROR. This helps catch these errors in
+// tests.
+#ifndef NDEBUG
+        catch (const std::logic_error & e)
+        {
+            state.io.onException();
+            exception.emplace(Exception::CreateFromSTDTag{}, e);
+            sendException(*exception, send_exception_with_stack_trace);
+            std::abort();
+        }
+#endif
        catch (const std::exception & e)
        {
            state.io.onException();
--- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
@ -8,6 +8,7 @@
 #include <IO/WriteHelpers.h>
 #include <Storages/MergeTree/MergeTreeData.h>
 #include <Storages/MergeTree/localBackup.h>
+#include <Storages/MergeTree/checkDataPart.h>
 #include <Common/StringUtils/StringUtils.h>
 #include <Common/escapeForFileName.h>
 #include <common/JSON.h>
@ -521,7 +522,18 @@ void IMergeTreeDataPart::loadChecksums(bool require)
        if (require)
            throw Exception("No checksums.txt in part " + name, ErrorCodes::NO_FILE_IN_DATA_PART);

-        bytes_on_disk = calculateTotalSizeOnDisk(volume->getDisk(), getFullRelativePath());
+        /// If the checksums file is not present, calculate the checksums and write them to disk.
+        /// Check the data while we are at it.
+        LOG_WARNING(storage.log, "Checksums for part {} not found. Will calculate them from data on disk.", name);
+        checksums = checkDataPart(shared_from_this(), false);
+        {
+            auto out = volume->getDisk()->writeFile(getFullRelativePath() + "checksums.txt.tmp", 4096);
+            checksums.write(*out);
+        }
+
+        volume->getDisk()->moveFile(getFullRelativePath() + "checksums.txt.tmp", getFullRelativePath() + "checksums.txt");
+
+        bytes_on_disk = checksums.getTotalSizeOnDisk();
    }
 }

--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@ -2453,19 +2453,6 @@ static void loadPartAndFixMetadataImpl(MergeTreeData::MutableDataPartPtr part)

    part->loadColumnsChecksumsIndexes(false, true);
    part->modification_time = disk->getLastModified(full_part_path).epochTime();
-
-    /// If the checksums file is not present, calculate the checksums and write them to disk.
-    /// Check the data while we are at it.
-    if (part->checksums.empty())
-    {
-        part->checksums = checkDataPart(part, false);
-        {
-            auto out = disk->writeFile(full_part_path + "checksums.txt.tmp", 4096);
-            part->checksums.write(*out);
-        }
-
-        disk->moveFile(full_part_path + "checksums.txt.tmp", full_part_path + "checksums.txt");
-    }
 }

 MergeTreeData::MutableDataPartPtr MergeTreeData::loadPartAndFixMetadata(const VolumePtr & volume, const String & relative_path) const
--- a/tests/integration/test_attach_without_checksums/init.py
+++ b/tests/integration/test_attach_without_checksums/init.py
--- a/tests/integration/test_attach_without_checksums/test.py
+++ b/tests/integration/test_attach_without_checksums/test.py
@ -0,0 +1,39 @@
+import pytest
+from helpers.cluster import ClickHouseCluster
+
+cluster = ClickHouseCluster(__file__)
+
+node1 = cluster.add_instance('node1')
+
+
+@pytest.fixture(scope="module")
+def start_cluster():
+    try:
+        cluster.start()
+
+        yield cluster
+    finally:
+        cluster.shutdown()
+
+
+def test_attach_without_checksums(start_cluster):
+    node1.query("CREATE TABLE test (date Date, key Int32, value String) Engine=MergeTree ORDER BY key PARTITION by date")
+
+    node1.query("INSERT INTO test SELECT toDate('2019-10-01'), number, toString(number) FROM numbers(100)")
+
+    assert node1.query("SELECT COUNT() FROM test WHERE key % 10 == 0") == "10\n"
+
+    node1.query("ALTER TABLE test DETACH PARTITION '2019-10-01'")
+
+    assert node1.query("SELECT COUNT() FROM test WHERE key % 10 == 0") == "0\n"
+    assert node1.query("SELECT COUNT() FROM test") == "0\n"
+
+    # to be sure output not empty
+    node1.exec_in_container(['bash', '-c', 'find /var/lib/clickhouse/data/default/test/detached -name "checksums.txt" | grep -e ".*" '], privileged=True, user='root')
+
+    node1.exec_in_container(['bash', '-c', 'find /var/lib/clickhouse/data/default/test/detached -name "checksums.txt" -delete'], privileged=True, user='root')
+
+    node1.query("ALTER TABLE test ATTACH PARTITION '2019-10-01'")
+
+    assert node1.query("SELECT COUNT() FROM test WHERE key % 10 == 0") == "10\n"
+    assert node1.query("SELECT COUNT() FROM test") == "100\n"
--- a/website/css/docs.css
+++ b/website/css/docs.css
@ -74,7 +74,7 @@ body[data-spy] #content {


 #content pre {
-    background: #eee;
+    background: #efefef;
    padding: 1rem;
 }