Merge branch 'ClickHouse:master' into master

2024-12-04 13:32:13 +00:00 · 2021-10-26 21:07:33 +03:00 · 2021-10-26 21:07:33 +03:00 · 5eacbac7a0
commit 5eacbac7a0
parent 3c6da1435e aae30a9e37
65 changed files with 1684 additions and 185 deletions
--- a/.github/workflows/cancel.yml
+++ b/.github/workflows/cancel.yml
@ -0,0 +1,13 @@
+name: Cancel
+on: # yamllint disable-line rule:truthy
+  workflow_run:
+    workflows: ["CIGithubActions"]
+    types:
+      - requested
+jobs:
+  cancel:
+    runs-on: [self-hosted, style-checker]
+    steps:
+      - uses: styfle/cancel-workflow-action@0.9.1
+        with:
+          workflow_id: ${{ github.event.workflow.id }}
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@ -1,4 +1,4 @@
-name: Ligthweight GithubActions
+name: CIGithubActions
 on: # yamllint disable-line rule:truthy
  pull_request:
    types:
@ -11,20 +11,25 @@ on: # yamllint disable-line rule:truthy
      - master
 jobs:
  CheckLabels:
-    runs-on: [self-hosted]
+    runs-on: [self-hosted, style-checker]
    steps:
      - name: Check out repository code
        uses: actions/checkout@v2
      - name: Labels check
-        run: cd $GITHUB_WORKSPACE/tests/ci && python3 run_check.py
+        run: |
+          cd $GITHUB_WORKSPACE/tests/ci
+          python3 run_check.py
  DockerHubPush:
    needs: CheckLabels
-    runs-on: [self-hosted]
+    if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
+    runs-on: [self-hosted, style-checker]
    steps:
      - name: Check out repository code
        uses: actions/checkout@v2
      - name: Images check
-        run: cd $GITHUB_WORKSPACE/tests/ci && python3 docker_images_check.py
+        run: |
+          cd $GITHUB_WORKSPACE/tests/ci
+          python3 docker_images_check.py
      - name: Upload images files to artifacts
        uses: actions/upload-artifact@v2
        with:
@ -32,7 +37,7 @@ jobs:
          path: ${{ runner.temp }}/docker_images_check/changed_images.json
  StyleCheck:
    needs: DockerHubPush
-    runs-on: [self-hosted]
+    runs-on: [self-hosted, style-checker]
    steps:
      - name: Download changed images
        uses: actions/download-artifact@v2
@ -42,12 +47,82 @@ jobs:
      - name: Check out repository code
        uses: actions/checkout@v2
      - name: Style Check
-        run: cd $GITHUB_WORKSPACE/tests/ci && python3 style_check.py
+        run: |
+          cd $GITHUB_WORKSPACE/tests/ci
+          python3 style_check.py
+  BuilderDebDebug:
+    needs: DockerHubPush
+    runs-on: [self-hosted, builder]
+    steps:
+      - name: Download changed images
+        uses: actions/download-artifact@v2
+        with:
+          name: changed_images
+          path: ${{ runner.temp }}/build_check
+      - name: Check out repository code
+        uses: actions/checkout@v2
+        with:
+          submodules: 'recursive'
+      - name: Build
+        env:
+          TEMP_PATH: ${{runner.temp}}/build_check
+          REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
+          CACHES_PATH: ${{runner.temp}}/../ccaches
+          CHECK_NAME: 'ClickHouse build check (actions)'
+          BUILD_NUMBER: 7
+        run: |
+          sudo rm -fr $TEMP_PATH
+          mkdir -p $TEMP_PATH
+          cp -r $GITHUB_WORKSPACE $TEMP_PATH
+          cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NUMBER
+      - name: Upload build URLs to artifacts
+        uses: actions/upload-artifact@v2
+        with:
+          name: ${{ env.BUILD_NAME }}
+          path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
+  BuilderReport:
+    needs: [BuilderDebDebug]
+    runs-on: [self-hosted, style-checker]
+    steps:
+      - name: Download json reports
+        uses: actions/download-artifact@v2
+        with:
+          path: ${{runner.temp}}/reports_dir
+      - name: Check out repository code
+        uses: actions/checkout@v2
+      - name: Report Builder
+        env:
+          TEMP_PATH: ${{runner.temp}}/report_check
+          REPORTS_PATH: ${{runner.temp}}/reports_dir
+          CHECK_NAME: 'ClickHouse build check (actions)'
+        run: |
+          sudo rm -fr $TEMP_PATH
+          mkdir -p $TEMP_PATH
+          cd $GITHUB_WORKSPACE/tests/ci
+          python3 build_report_check.py "$CHECK_NAME"
+  FastTest:
+    needs: DockerHubPush
+    runs-on: [self-hosted, builder]
+    steps:
+      - name: Check out repository code
+        uses: actions/checkout@v2
+      - name: Fast Test
+        env:
+          TEMP_PATH: ${{runner.temp}}/fasttest
+          REPO_COPY: ${{runner.temp}}/fasttest/ClickHouse
+          CACHES_PATH: ${{runner.temp}}/../ccaches
+        run: |
+          sudo rm -fr $TEMP_PATH
+          mkdir -p $TEMP_PATH
+          cp -r $GITHUB_WORKSPACE $TEMP_PATH
+          cd $REPO_COPY/tests/ci && python3 fast_test_check.py
  FinishCheck:
-    needs: [StyleCheck, DockerHubPush, CheckLabels]
-    runs-on: [self-hosted]
+    needs: [StyleCheck, DockerHubPush, CheckLabels, BuilderReport, FastTest]
+    runs-on: [self-hosted, style-checker]
    steps:
      - name: Check out repository code
        uses: actions/checkout@v2
      - name: Finish label
-        run: cd $GITHUB_WORKSPACE/tests/ci && python3 finish_check.py
+        run: |
+          cd $GITHUB_WORKSPACE/tests/ci
+          python3 finish_check.py
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -149,8 +149,6 @@ if (ENABLE_FUZZING)
    set (ENABLE_JEMALLOC 0)
    set (ENABLE_CHECK_HEAVY_BUILDS 1)
    set (GLIBC_COMPATIBILITY OFF)
-    set (ENABLE_PROTOBUF ON)
-    set (USE_INTERNAL_PROTOBUF_LIBRARY ON)
 endif()

 # Global libraries
--- a/contrib/protobuf-cmake/CMakeLists.txt
+++ b/contrib/protobuf-cmake/CMakeLists.txt
@ -17,6 +17,16 @@ endif ()

 add_subdirectory("${protobuf_SOURCE_DIR}/cmake" "${protobuf_BINARY_DIR}")

+if (ENABLE_FUZZING)
+    # `protoc` will be built with sanitizer and it could fail during ClickHouse build
+    # It easily reproduces in oss-fuzz building pipeline
+    # To avoid this we can try to build `protoc` without any sanitizer with option `-fno-sanitize=all`, but
+    # it this case we will face with linker errors, because libcxx still will be built with sanitizer
+    # So, we can simply suppress all of these failures with a combination this flag and an environment variable
+    # export MSAN_OPTIONS=exit_code=0
+    target_compile_options(protoc PRIVATE "-fsanitize-recover=all")
+endif()
+
 # We don't want to stop compilation on warnings in protobuf's headers.
 # The following line overrides the value assigned by the command target_include_directories() in libprotobuf.cmake
 set_property(TARGET libprotobuf PROPERTY INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${protobuf_SOURCE_DIR}/src")
--- a/docker/packager/other/fuzzer.sh
+++ b/docker/packager/other/fuzzer.sh
@ -12,19 +12,19 @@ printenv
 rm -f CMakeCache.txt
 read -ra CMAKE_FLAGS <<< "${CMAKE_FLAGS:-}"
 # Hope, that the most part of files will be in cache, so we just link new executables
-cmake --debug-trycompile --verbose=1 -DCMAKE_VERBOSE_MAKEFILE=1 -LA -DCMAKE_C_COMPILER="$CC" -DCMAKE_CXX_COMPILER="$CXX" -DENABLE_CLICKHOUSE_ODBC_BRIDGE=OFF \
-    -DENABLE_LIBRARIES=0 -DENABLE_SSL=1 -DUSE_INTERNAL_SSL_LIBRARY=1 -DUSE_UNWIND=ON -DENABLE_EMBEDDED_COMPILER=0 \
-    -DENABLE_EXAMPLES=0 -DENABLE_UTILS=0 -DENABLE_THINLTO=0 "-DSANITIZE=$SANITIZER" \
-    -DENABLE_FUZZING=1 -DFUZZER='libfuzzer' -DENABLE_TCMALLOC=0 -DENABLE_JEMALLOC=0 \
-    -DENABLE_CHECK_HEAVY_BUILDS=1 -DGLIBC_COMPATIBILITY=OFF "${CMAKE_FLAGS[@]}" ..
+# Please, add or change flags directly in cmake
+cmake --debug-trycompile --verbose=1 -DCMAKE_VERBOSE_MAKEFILE=1 -LA -DCMAKE_C_COMPILER="$CC" -DCMAKE_CXX_COMPILER="$CXX" \
+    -DSANITIZE="$SANITIZER" -DENABLE_FUZZING=1 -DFUZZER='libfuzzer' -DENABLE_PROTOBUF=1 -DUSE_INTERNAL_PROTOBUF_LIBRARY=1 "${CMAKE_FLAGS[@]}" ..

 FUZZER_TARGETS=$(find ../src -name '*_fuzzer.cpp' -execdir basename {} .cpp ';' | tr '\n' ' ')

+NUM_JOBS=$(($(nproc || grep -c ^processor /proc/cpuinfo)))
+
 mkdir -p /output/fuzzers
 for FUZZER_TARGET in $FUZZER_TARGETS
 do
    # shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty.
-    ninja $NINJA_FLAGS $FUZZER_TARGET
+    ninja $NINJA_FLAGS $FUZZER_TARGET -j $NUM_JOBS
    # Find this binary in build directory and strip it
    FUZZER_PATH=$(find ./src -name "$FUZZER_TARGET")
    strip --strip-unneeded "$FUZZER_PATH"
--- a/docs/zh/development/continuous-integration.md
+++ b/docs/zh/development/continuous-integration.md
@ -1 +0,0 @@
-../../en/development/continuous-integration.md
--- a/docs/zh/development/continuous-integration.md
+++ b/docs/zh/development/continuous-integration.md
@ -0,0 +1,155 @@
+# 持续集成检查 {#continuous-integration-checks}
+当你提交一个pull请求时, ClickHouse[持续集成(CI)系统](https://clickhouse.com/docs/en/development/tests/#test-automation)会对您的代码运行一些自动检查.
+
+这在存储库维护者(来自ClickHouse团队的人)筛选了您的代码并将可测试标签添加到您的pull请求之后发生.
+
+检查的结果被列在[GitHub检查文档](https://docs.github.com/en/github/collaborating-with-pull-requests/collaborating-on-repositories-with-code-quality-features/about-status-checks)中所述的GitHub pull请求页面.
+
+如果检查失败，您可能被要求去修复它. 该界面介绍了您可能遇到的检查，以及如何修复它们.
+
+如果检查失败看起来与您的更改无关, 那么它可能是一些暂时的故障或基础设施问题. 向pull请求推一个空的commit以重新启动CI检查:
+
+```
+git reset
+git commit --allow-empty
+git push
+```
+
+如果您不确定要做什么，可以向维护人员寻求帮助.
+
+## 与Master合并 {#merge-with-master}
+验证PR是否可以合并到master. 如果没有, 它将失败并显示消息'Cannot fetch mergecommit'的.请按[GitHub文档](https://docs.github.com/en/github/collaborating-with-pull-requests/addressing-merge-conflicts/resolving-a-merge-conflict-on-github)中描述的冲突解决, 或使用git将主分支合并到您的pull请求分支来修复这个检查.
+
+## 文档检查 {#docs-check}
+尝试构建ClickHouse文档网站. 如果您更改了文档中的某些内容, 它可能会失败. 最可能的原因是文档中的某些交叉链接是错误的. 转到检查报告并查找`ERROR`和`WARNING`消息.
+
+### 报告详情 {#report-details}
+-  [状态页示例](https://clickhouse-test-reports.s3.yandex.net/12550/eabcc293eb02214caa6826b7c15f101643f67a6b/docs_check.html)
+-  `docs_output.txt`包含构建日志信息. [成功结果案例](https://clickhouse-test-reports.s3.yandex.net/12550/eabcc293eb02214caa6826b7c15f101643f67a6b/docs_check/docs_output.txt)
+
+## 描述信息检查 {#description-check}
+检查pull请求的描述是否符合[PULL_REQUEST_TEMPLATE.md](https://github.com/ClickHouse/ClickHouse/blob/master/.github/PULL_REQUEST_TEMPLATE.md)模板.
+
+您必须为您的更改指定一个更改日志类别(例如，Bug修复), 并且为[CHANGELOG.md](../whats-new/changelog/)编写一条用户可读的消息用来描述更改.
+
+## 推送到DockerHub {#push-to-dockerhub}
+生成用于构建和测试的docker映像, 然后将它们推送到DockerHub.
+
+## 标记检查 {#marker-check}
+该检查意味着CI系统已经开始处理PR.当它处于'待处理'状态时，意味着尚未开始所有检查. 启动所有检查后，状态更改为'成功'.
+
+# 格式检查 {#style-check}
+使用`utils/check-style/check-style`二进制文件执行一些简单的基于正则表达式的代码样式检查(注意, 它可以在本地运行).
+如果失败, 按照[代码样式指南](./style.md)修复样式错误.
+
+### 报告详情 {#report-details}
+-  [状态页示例](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check.html)
+-  `docs_output.txt`记录了查结果错误(无效表格等), 空白页表示没有错误. [成功结果案例](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check/output.txt)
+
+### PVS 检查 {#pvs-check}
+使用静态分析工具[PVS-studio](https://www.viva64.com/en/pvs-studio/)检查代码. 查看报告以查看确切的错误.如果可以则修复它们, 如果不行, 可以向ClickHouse的维护人员寻求帮忙.
+
+### 报告详情 {#report-details}
+-  [状态页示例](https://clickhouse-test-reports.s3.yandex.net/12550/67d716b5cc3987801996c31a67b31bf141bc3486/pvs_check.html)
+-  `test_run.txt.out.log`包含构建和分析日志文件.它只包含解析或未找到的错误.
+-  `HTML report`包含分析结果.有关说明请访问PVS的[官方网站](https://www.viva64.com/en/m/0036/#ID14E9A2B2CD)
+
+## 快速测试 {#fast-test}
+通常情况下这是PR运行的第一个检查.它构建ClickHouse以及大多数无状态运行测试, 其中省略了一些.如果失败，在修复之前不会开始进一步的检查. 查看报告以了解哪些测试失败, 然后按照[此处](./tests.md#functional-test-locally)描述的在本地重现失败.
+
+### 报告详情 {#report-details}
+[状态页示例](https://clickhouse-test-reports.s3.yandex.net/12550/67d716b5cc3987801996c31a67b31bf141bc3486/fast_test.html)
+
+#### 状态页文件 {#status-page-files}
+- `runlog.out.log` 是包含所有其他日志的通用日志.
+- `test_log.txt`
+- `submodule_log.txt` 包含关于克隆和检查所需子模块的消息.
+- `stderr.log`
+- `stdout.log`
+- `clickhouse-server.log`
+- `clone_log.txt`
+- `install_log.txt`
+- `clickhouse-server.err.log`
+- `build_log.txt`
+- `cmake_log.txt` 包含关于C/C++和Linux标志检查的消息.
+
+#### 状态页列信息 {#status-page-columns}
+- 测试名称 -- 包含测试的名称(不带路径, 例如, 所有类型的测试将被剥离到该名称).
+- 测试状态 -- 跳过、成功或失败之一.
+- 测试时间, 秒. -- 这个测试是空的.
+
+## 建构检查 {#build-check}
+在各种配置中构建ClickHouse, 以便在后续步骤中使用. 您必须修复失败的构建.构建日志通常有足够的信息来修复错误, 但是您可能必须在本地重现故障. `cmake`选项可以在构建日志中通过grep `cmake`操作找到.使用这些选项并遵循[一般的构建过程](./build.md).
+
+### 报告详情 {#report-details}
+[状态页示例](https://clickhouse-builds.s3.yandex.net/12550/67d716b5cc3987801996c31a67b31bf141bc3486/clickhouse_build_check/report.html)
+- **Compiler**: `gcc-9` 或 `clang-10` (或其他架构的`clang-10-xx`, 比如`clang-10-freebsd`).
+- **Build type**: `Debug` or `RelWithDebInfo` (cmake).
+- **Sanitizer**: `none` (without sanitizers), `address` (ASan), `memory` (MSan), `undefined` (UBSan), or `thread` (TSan).
+- **Bundled**: `bundled` 构建使用来自 `contrib` 库, 而 `unbundled` 构建使用系统库.
+- **Splitted**: `splitted` is a [split build](https://clickhouse.com/docs/en/development/build/#split-build)
+- **Status**: `成功` 或 `失败`
+- **Build log**: 链接到构建和文件复制日志, 当构建失败时很有用.
+- **Build time**.
+- **Artifacts**: 构建结果文件 (`XXX`是服务器版本, 比如`20.8.1.4344`).
+    - `clickhouse-client_XXX_all.deb`
+    -` clickhouse-common-static-dbg_XXX[+asan, +msan, +ubsan, +tsan]_amd64.deb`
+    - `clickhouse-common-staticXXX_amd64.deb`
+    - `clickhouse-server_XXX_all.deb`
+    - `clickhouse-test_XXX_all.deb`
+    - `clickhouse_XXX_amd64.buildinfo`
+    - `clickhouse_XXX_amd64.changes`
+    - `clickhouse`: Main built binary.
+    - `clickhouse-odbc-bridge`
+    - `unit_tests_dbms`: 带有 ClickHouse 单元测试的 GoogleTest 二进制文件.
+    - `shared_build.tgz`: 使用共享库构建.
+    - `performance.tgz`: 用于性能测试的特殊包.
+
+## 特殊构建检查 {#special-buildcheck}
+使用clang-tidy执行静态分析和代码样式检查. 该报告类似于构建检查. 修复在构建日志中发现的错误.
+
+## 功能无状态测试 {#functional-stateless-tests}
+为构建在不同配置中的ClickHouse二进制文件运行[无状态功能测试](./tests.md#functional-tests)——发布、调试、使用杀毒软件等.通过报告查看哪些测试失败，然后按照[此处](./tests.md#functional-test-locally)描述的在本地重现失败.注意, 您必须使用正确的构建配置来重现——在AddressSanitizer下测试可能失败,但在Debug中可以通过.从[CI构建检查页面](./build.md#you-dont-have-to-build-clickhouse)下载二进制文件, 或者在本地构建它.
+
+## 功能有状态测试 {#functional-stateful-tests}
+运行[有状态功能测试](./tests.md#functional-tests).以无状态功能测试相同的方式对待它们.不同之处在于它们需要从[Yandex.Metrica数据集](https://clickhouse.com/docs/en/getting-started/example-datasets/metrica/)的`hits`和`visits`表来运行.
+
+## 集成测试 {#integration-tests}
+运行[集成测试](./tests.md#integration-tests).
+
+## Testflows 检查{#testflows-check}
+使用Testflows测试系统去运行一些测试, 在[此处](https://github.com/ClickHouse/ClickHouse/tree/master/tests/testflows#running-tests-locally)查看如何在本地运行它们.
+
+## 压力测试 {#stress-test}
+从多个客户端并发运行无状态功能测试, 用以检测与并发相关的错误.如果失败:
+```
+* Fix all other test failures first;
+* Look at the report to find the server logs and check them for possible causes
+  of error.
+```
+
+## 冒烟测试 {#split-build-smoke-test}
+检查[拆分构建](./build.md#split-build)配置中的服务器构建是否可以启动并运行简单查询.如果失败:
+```
+* Fix other test errors first;
+* Build the server in [split build](./build.md#split-build) configuration
+  locally and check whether it can start and run `select 1`.
+```
+
+## 兼容性检查 {#compatibility-check}
+检查`clickhouse`二进制文件是否可以在带有旧libc版本的发行版上运行.如果失败, 请向维护人员寻求帮助.
+
+## AST模糊器 {#ast-fuzzer}
+运行随机生成的查询来捕获程序错误.如果失败, 请向维护人员寻求帮助.
+
+## 性能测试 {#performance-tests}
+测量查询性能的变化. 这是最长的检查, 只需不到 6 小时即可运行.性能测试报告在[此处](https://github.com/ClickHouse/ClickHouse/tree/master/docker/test/performance-comparison#how-to-read-the-report)有详细描述.
+
+## 质量保证 {#qa}
+什么是状态页面上的任务(专用网络)项目?
+
+它是 Yandex 内部工作系统的链接. Yandex 员工可以看到检查的开始时间及其更详细的状态.
+
+运行测试的地方
+
+Yandex 内部基础设施的某个地方.
--- a/docs/zh/operations/external-authenticators/kerberos.md
+++ b/docs/zh/operations/external-authenticators/kerberos.md
@ -1 +0,0 @@
-../../../en/operations/external-authenticators/kerberos.md
--- a/docs/zh/operations/external-authenticators/kerberos.md
+++ b/docs/zh/operations/external-authenticators/kerberos.md
@ -0,0 +1,105 @@
+# Kerberos认证 {#external-authenticators-kerberos} 
+现有正确配置的 ClickHouse 用户可以通过 Kerberos 身份验证协议进行身份验证.
+
+目前, Kerberos 只能用作现有用户的外部身份验证器，这些用户在 `users.xml` 或本地访问控制路径中定义.
+这些用户只能使用 HTTP 请求, 并且必须能够使用 GSS-SPNEGO 机制进行身份验证.
+
+对于这种方法, 必须在系统中配置 Kerberos, 且必须在 ClickHouse 配置中启用.
+
+## 开启Kerberos {#enabling-kerberos-in-clickHouse}
+要启用 Kerberos, 应该在 `config.xml` 中包含 `kerberos` 部分. 此部分可能包含其他参数.
+
+#### 参数: {#parameters}
+- `principal` - 将在接受安全上下文时获取和使用的规范服务主体名称.
+- 此参数是可选的, 如果省略, 将使用默认主体.
+
+- `realm` - 一个领域, 用于将身份验证限制为仅那些发起者领域与其匹配的请求.
+
+- 此参数是可选的，如果省略，则不会应用其他领域的过滤.
+
+示例 (进入 `config.xml`):
+```xml
+<yandex>
+    <!- ... -->
+    <kerberos />
+</yandex>
+```
+
+主体规范:
+```xml
+<yandex>
+    <!- ... -->
+    <kerberos>
+        <principal>HTTP/clickhouse.example.com@EXAMPLE.COM</principal>
+    </kerberos>
+</yandex>
+```
+
+按领域过滤:
+```xml
+<yandex>
+    <!- ... -->
+    <kerberos>
+        <realm>EXAMPLE.COM</realm>
+    </kerberos>
+</yandex>
+```
+
+!!! warning "注意"
+
+您只能定义一个 `kerberos` 部分. 多个 `kerberos` 部分的存在将强制 ClickHouse 禁用 Kerberos 身份验证.
+
+!!! warning "注意"
+
+`主体`和`领域`部分不能同时指定. `主体`和`领域`的出现将迫使ClickHouse禁用Kerberos身份验证.
+
+## Kerberos作为现有用户的外部身份验证器 {#kerberos-as-an-external-authenticator-for-existing-users}
+Kerberos可以用作验证本地定义用户(在`users.xml`或本地访问控制路径中定义的用户)身份的方法。目前，**只有**通过HTTP接口的请求才能被认证(通过GSS-SPNEGO机制).
+
+Kerberos主体名称格式通常遵循以下模式:
+- *primary/instance@REALM*
+
+*/instance* 部分可能出现零次或多次. **发起者的规范主体名称的主要部分应与被认证用户名匹配, 以便身份验证成功**.
+
+### `users.xml`中启用Kerberos {#enabling-kerberos-in-users-xml}
+为了启用用户的 Kerberos 身份验证, 请在用户定义中指定 `kerberos` 部分而不是`密码`或类似部分.
+
+参数:
+- `realm` - 用于将身份验证限制为仅那些发起者的领域与其匹配的请求的领域.
+- 此参数是可选的, 如果省略, 则不会应用其他按领域的过滤.
+
+示例 (进入 `users.xml`):
+```
+<yandex>
+    <!- ... -->
+    <users>
+        <!- ... -->
+        <my_user>
+            <!- ... -->
+            <kerberos>
+                <realm>EXAMPLE.COM</realm>
+            </kerberos>
+        </my_user>
+    </users>
+</yandex>
+```
+
+!!! warning "警告"
+
+注意, Kerberos身份验证不能与任何其他身份验证机制一起使用. 任何其他部分(如`密码`和`kerberos`)的出现都会迫使ClickHouse关闭.
+
+!!! info "提醒"
+
+请注意, 现在, 一旦用户 `my_user` 使用 `kerberos`, 必须在主 `config.xml` 文件中启用 Kerberos，如前所述.
+
+### 使用 SQL 启用 Kerberos {#enabling-kerberos-using-sql}
+在 ClickHouse 中启用 [SQL 驱动的访问控制和帐户管理](https://clickhouse.com/docs/en/operations/access-rights/#access-control)后, 也可以使用 SQL 语句创建由 Kerberos 识别的用户.
+
+```sql
+CREATE USER my_user IDENTIFIED WITH kerberos REALM 'EXAMPLE.COM'
+```
+
+...或者, 不按领域过滤:
+```sql
+CREATE USER my_user IDENTIFIED WITH kerberos
+```
--- a/docs/zh/operations/system-tables/asynchronous_metric_log.md
+++ b/docs/zh/operations/system-tables/asynchronous_metric_log.md
@ -5,4 +5,34 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3

 ## system.asynchronous_metric_log {#system-tables-async-log}

-包含以下内容的历史值 `system.asynchronous_log` （见 [系统。asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics))
+包含每分钟记录一次的 `system.asynchronous_metrics`历史值. 默认开启. 
+
+列：
+-   `event_date` ([Date](../../sql-reference/data-types/date.md)) — 事件日期.
+-   `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — 事件时间.
+-   `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — 事件时间(微秒).
+-   `name` ([String](../../sql-reference/data-types/string.md)) — 指标名.
+-   `value` ([Float64](../../sql-reference/data-types/float.md)) — 指标值.
+
+**示例**
+``` sql
+SELECT * FROM system.asynchronous_metric_log LIMIT 10
+```
+``` text
+┌─event_date─┬──────────event_time─┬────event_time_microseconds─┬─name─────────────────────────────────────┬─────value─┐
+│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ CPUFrequencyMHz_0                        │    2120.9 │
+│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.arenas.all.pmuzzy               │       743 │
+│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.arenas.all.pdirty               │     26288 │
+│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.background_thread.run_intervals │         0 │
+│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.background_thread.num_runs      │         0 │
+│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.retained                        │  60694528 │
+│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.mapped                          │ 303161344 │
+│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.resident                        │ 260931584 │
+│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.metadata                        │  12079488 │
+│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.allocated                       │ 133756128 │
+└────────────┴─────────────────────┴────────────────────────────┴──────────────────────────────────────────┴───────────┘
+```
+
+**另请参阅**
+-   [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) — 包含在后台定期计算的指标.
+-   [system.metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — 包含定期刷新到磁盘表 `system.metrics` 以及 `system.events` 中的指标值历史记录.
--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@ -18,7 +18,9 @@
 #include <Common/Macros.h>
 #include <Common/Config/ConfigProcessor.h>
 #include <Common/ThreadStatus.h>
+#include <Common/TLDListsHolder.h>
 #include <Common/quoteString.h>
+#include <Common/randomSeed.h>
 #include <loggers/Loggers.h>
 #include <IO/ReadBufferFromFile.h>
 #include <IO/ReadBufferFromString.h>
@ -35,7 +37,6 @@
 #include <Formats/registerFormats.h>
 #include <boost/program_options/options_description.hpp>
 #include <base/argsToConfig.h>
-#include <Common/randomSeed.h>
 #include <filesystem>

 namespace fs = std::filesystem;
@ -179,7 +180,6 @@ void LocalServer::initialize(Poco::Util::Application & self)
        ConfigProcessor config_processor(config_path, false, true);
        config_processor.setConfigPath(fs::path(config_path).parent_path());
        auto loaded_config = config_processor.loadConfig();
-        config_processor.savePreprocessedConfig(loaded_config, loaded_config.configuration->getString("path", "."));
        config().add(loaded_config.configuration.duplicate(), PRIO_DEFAULT, false);
    }

@ -284,6 +284,11 @@ void LocalServer::tryInitPath()
    global_context->setFlagsPath(path + "flags");

    global_context->setUserFilesPath(""); // user's files are everywhere
+
+    /// top_level_domains_lists
+    const std::string & top_level_domains_path = config().getString("top_level_domains_path", path + "top_level_domains/");
+    if (!top_level_domains_path.empty())
+        TLDListsHolder::getInstance().parseConfig(fs::path(top_level_domains_path) / "", config());
 }


@ -380,7 +385,6 @@ void LocalServer::setupUsers()
        const auto users_config_path = config().getString("users_config", config().getString("config-file", "config.xml"));
        ConfigProcessor config_processor(users_config_path);
        const auto loaded_config = config_processor.loadConfig();
-        config_processor.savePreprocessedConfig(loaded_config, config().getString("path", DBMS_DEFAULT_PATH));
        users_config = loaded_config.configuration;
    }
    else
@ -673,6 +677,7 @@ void LocalServer::addOptions(OptionsDescription & options_description)

        ("no-system-tables", "do not attach system tables (better startup time)")
        ("path", po::value<std::string>(), "Storage path")
+        ("top_level_domains_path", po::value<std::string>(), "Path to lists with custom TLDs")
        ;
 }

--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@ -960,9 +960,14 @@ if (ThreadFuzzer::instance().isEffective())
        global_context->setMMappedFileCache(mmap_cache_size);

 #if USE_EMBEDDED_COMPILER
+    /// 128 MB
    constexpr size_t compiled_expression_cache_size_default = 1024 * 1024 * 128;
    size_t compiled_expression_cache_size = config().getUInt64("compiled_expression_cache_size", compiled_expression_cache_size_default);
-    CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_size);
+
+    constexpr size_t compiled_expression_cache_elements_size_default = 10000;
+    size_t compiled_expression_cache_elements_size = config().getUInt64("compiled_expression_cache_elements_size", compiled_expression_cache_elements_size_default);
+
+    CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_size, compiled_expression_cache_elements_size);
 #endif

    /// Set path for format schema files
--- a/programs/server/config.xml
+++ b/programs/server/config.xml
@ -351,9 +351,12 @@
      -->
    <mmap_cache_size>1000</mmap_cache_size>

-    <!-- Cache size for compiled expressions.-->
+    <!-- Cache size in bytes for compiled expressions.-->
    <compiled_expression_cache_size>134217728</compiled_expression_cache_size>

+    <!-- Cache size in elements for compiled expressions.-->
+    <compiled_expression_cache_elements_size>10000</compiled_expression_cache_elements_size>
+
    <!-- Path to data directory, with trailing slash. -->
    <path>/var/lib/clickhouse/</path>

--- a/programs/server/config.yaml.example
+++ b/programs/server/config.yaml.example
@ -279,9 +279,12 @@ mark_cache_size: 5368709120
 # also it can be dropped manually by the SYSTEM DROP MMAP CACHE query.
 mmap_cache_size: 1000

-# Cache size for compiled expressions.
+# Cache size in bytes for compiled expressions.
 compiled_expression_cache_size: 134217728

+# Cache size in elements for compiled expressions.
+compiled_expression_cache_elements_size: 10000
+
 # Path to data directory, with trailing slash.
 path: /var/lib/clickhouse/

--- a/src/Common/LRUCache.h
+++ b/src/Common/LRUCache.h
@ -36,12 +36,13 @@ public:
    using Mapped = TMapped;
    using MappedPtr = std::shared_ptr<Mapped>;

-private:
-    using Clock = std::chrono::steady_clock;
-
-public:
-    LRUCache(size_t max_size_)
-        : max_size(std::max(static_cast<size_t>(1), max_size_)) {}
+    /** Initialize LRUCache with max_size and max_elements_size.
+      * max_elements_size == 0 means no elements size restrictions.
+      */
+    LRUCache(size_t max_size_, size_t max_elements_size_ = 0)
+        : max_size(std::max(static_cast<size_t>(1), max_size_))
+        , max_elements_size(max_elements_size_)
+        {}

    MappedPtr get(const Key & key)
    {
@ -252,6 +253,7 @@ private:
    /// Total weight of values.
    size_t current_size = 0;
    const size_t max_size;
+    const size_t max_elements_size;

    std::atomic<size_t> hits {0};
    std::atomic<size_t> misses {0};
@ -311,7 +313,8 @@ private:
    {
        size_t current_weight_lost = 0;
        size_t queue_size = cells.size();
-        while ((current_size > max_size) && (queue_size > 1))
+
+        while ((current_size > max_size || (max_elements_size != 0 && queue_size > max_elements_size)) && (queue_size > 1))
        {
            const Key & key = queue.front();

--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@ -227,7 +227,8 @@
    M(CreatedHTTPConnections, "Total amount of created HTTP connections (counter increase every time connection is created).") \
    \
    M(CannotWriteToWriteBufferDiscard, "Number of stack traces dropped by query profiler or signal handler because pipe is full or cannot write to pipe.") \
-    M(QueryProfilerSignalOverruns, "Number of times we drop processing of a signal due to overrun plus the number of signals that OS has not delivered due to overrun.") \
+    M(QueryProfilerSignalOverruns, "Number of times we drop processing of a query profiler signal due to overrun plus the number of signals that OS has not delivered due to overrun.") \
+    M(QueryProfilerRuns, "Number of times QueryProfiler had been run.") \
    \
    M(CreatedLogEntryForMerge, "Successfully created log entry to merge parts in ReplicatedMergeTree.") \
    M(NotCreatedLogEntryForMerge, "Log entry to merge parts in ReplicatedMergeTree is not created due to concurrent log update by another replica.") \
--- a/src/Common/ProgressIndication.cpp
+++ b/src/Common/ProgressIndication.cpp
@ -14,17 +14,19 @@

 namespace
 {
-    constexpr UInt64 ZERO = 0;
+    constexpr UInt64 ALL_THREADS = 0;

    UInt64 calculateNewCoresNumber(DB::ThreadIdToTimeMap const & prev, DB::ThreadIdToTimeMap const& next)
    {
-        if (next.find(ZERO) == next.end())
-            return ZERO;
-        auto accumulated = std::accumulate(next.cbegin(), next.cend(), ZERO,
-            [&prev](UInt64 acc, auto const & elem)
+        if (next.find(ALL_THREADS) == next.end())
+            return 0;
+
+        auto accumulated = std::accumulate(next.cbegin(), next.cend(), 0,
+            [&prev](UInt64 acc, const auto & elem)
            {
-                if (elem.first == ZERO)
+                if (elem.first == ALL_THREADS)
                    return acc;
+
                auto thread_time = elem.second.time();
                auto it = prev.find(elem.first);
                if (it != prev.end())
@ -32,9 +34,9 @@ namespace
                return acc + thread_time;
            });

-        auto elapsed = next.at(ZERO).time() - (prev.contains(ZERO) ? prev.at(ZERO).time() : ZERO);
-        if (elapsed == ZERO)
-            return ZERO;
+        auto elapsed = next.at(ALL_THREADS).time() - (prev.contains(ALL_THREADS) ? prev.at(ALL_THREADS).time() : 0);
+        if (elapsed == 0)
+            return 0;
        return (accumulated + elapsed - 1) / elapsed;
    }
 }
@ -109,7 +111,7 @@ size_t ProgressIndication::getUsedThreadsCount() const

 UInt64 ProgressIndication::getApproximateCoresNumber() const
 {
-    return std::accumulate(host_active_cores.cbegin(), host_active_cores.cend(), ZERO,
+    return std::accumulate(host_active_cores.cbegin(), host_active_cores.cend(), 0,
        [](UInt64 acc, auto const & elem)
        {
            return acc + elem.second;
@ -121,11 +123,12 @@ ProgressIndication::MemoryUsage ProgressIndication::getMemoryUsage() const
    return std::accumulate(thread_data.cbegin(), thread_data.cend(), MemoryUsage{},
        [](MemoryUsage const & acc, auto const & host_data)
        {
-            auto host_usage = std::accumulate(host_data.second.cbegin(), host_data.second.cend(), ZERO,
-                [](UInt64 memory, auto const & data)
-                {
-                    return memory + data.second.memory_usage;
-                });
+            UInt64 host_usage = 0;
+            // In ProfileEvents packets thread id 0 specifies common profiling information
+            // for all threads executing current query on specific host. So instead of summing per thread
+            // memory consumption it's enough to look for data with thread id 0.
+            if (auto it = host_data.second.find(ALL_THREADS); it != host_data.second.end())
+                host_usage = it->second.memory_usage;
            return MemoryUsage{.total = acc.total + host_usage, .max = std::max(acc.max, host_usage)};
        });
 }
--- a/src/Common/QueryProfiler.cpp
+++ b/src/Common/QueryProfiler.cpp
@ -15,6 +15,7 @@
 namespace ProfileEvents
 {
    extern const Event QueryProfilerSignalOverruns;
+    extern const Event QueryProfilerRuns;
 }

 namespace DB
@ -60,6 +61,7 @@ namespace
        const StackTrace stack_trace(signal_context);

        TraceCollector::collect(trace_type, stack_trace, 0);
+        ProfileEvents::increment(ProfileEvents::QueryProfilerRuns);

        errno = saved_errno;
    }
--- a/src/Core/ProtocolDefines.h
+++ b/src/Core/ProtocolDefines.h
@ -12,7 +12,9 @@
 /// Minimum revision with exactly the same set of aggregation methods and rules to select them.
 /// Two-level (bucketed) aggregation is incompatible if servers are inconsistent in these rules
 /// (keys will be placed in different buckets and result will not be fully aggregated).
-#define DBMS_MIN_REVISION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD 54431
+#define DBMS_MIN_REVISION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD 54456
+#define DBMS_MIN_MAJOR_VERSION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD 21
+#define DBMS_MIN_MINOR_VERSION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD 4
 #define DBMS_MIN_REVISION_WITH_COLUMN_DEFAULTS_METADATA 54410

 #define DBMS_MIN_REVISION_WITH_LOW_CARDINALITY_TYPE 54405
--- a/src/Functions/now.cpp
+++ b/src/Functions/now.cpp
@ -43,13 +43,13 @@ private:
 class FunctionBaseNow : public IFunctionBase
 {
 public:
-    explicit FunctionBaseNow(time_t time_, DataTypePtr return_type_) : time_value(time_), return_type(return_type_) {}
+    explicit FunctionBaseNow(time_t time_, DataTypes argument_types_, DataTypePtr return_type_)
+        : time_value(time_), argument_types(std::move(argument_types_)), return_type(std::move(return_type_)) {}

    String getName() const override { return "now"; }

    const DataTypes & getArgumentTypes() const override
    {
-        static const DataTypes argument_types;
        return argument_types;
    }

@ -69,6 +69,7 @@ public:

 private:
    time_t time_value;
+    DataTypes argument_types;
    DataTypePtr return_type;
 };

@ -117,8 +118,10 @@ public:
        }
        if (arguments.size() == 1)
            return std::make_unique<FunctionBaseNow>(
-                time(nullptr), std::make_shared<DataTypeDateTime>(extractTimeZoneNameFromFunctionArguments(arguments, 0, 0)));
-        return std::make_unique<FunctionBaseNow>(time(nullptr), std::make_shared<DataTypeDateTime>());
+                time(nullptr), DataTypes{arguments.front().type},
+                std::make_shared<DataTypeDateTime>(extractTimeZoneNameFromFunctionArguments(arguments, 0, 0)));
+
+        return std::make_unique<FunctionBaseNow>(time(nullptr), DataTypes(), std::make_shared<DataTypeDateTime>());
    }
 };

--- a/src/Functions/now64.cpp
+++ b/src/Functions/now64.cpp
@ -67,13 +67,13 @@ private:
 class FunctionBaseNow64 : public IFunctionBase
 {
 public:
-    explicit FunctionBaseNow64(Field time_, DataTypePtr return_type_) : time_value(time_), return_type(return_type_) {}
+    explicit FunctionBaseNow64(Field time_, DataTypes argument_types_, DataTypePtr return_type_)
+        : time_value(time_), argument_types(std::move(argument_types_)), return_type(std::move(return_type_)) {}

    String getName() const override { return "now64"; }

    const DataTypes & getArgumentTypes() const override
    {
-        static const DataTypes argument_types;
        return argument_types;
    }

@ -93,6 +93,7 @@ public:

 private:
    Field time_value;
+    DataTypes argument_types;
    DataTypePtr return_type;
 };

@ -139,14 +140,19 @@ public:
        return std::make_shared<DataTypeDateTime64>(scale, timezone_name);
    }

-    FunctionBasePtr buildImpl(const ColumnsWithTypeAndName &, const DataTypePtr & result_type) const override
+    FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const override
    {
        UInt32 scale = DataTypeDateTime64::default_scale;
        auto res_type = removeNullable(result_type);
        if (const auto * type = typeid_cast<const DataTypeDateTime64 *>(res_type.get()))
            scale = type->getScale();

-        return std::make_unique<FunctionBaseNow64>(nowSubsecond(scale), result_type);
+        DataTypes arg_types;
+        arg_types.reserve(arguments.size());
+        for (const auto & arg : arguments)
+            arg_types.push_back(arg.type);
+
+        return std::make_unique<FunctionBaseNow64>(nowSubsecond(scale), std::move(arg_types), std::move(result_type));
    }
 };

--- a/src/Interpreters/AsynchronousInsertQueue.cpp
+++ b/src/Interpreters/AsynchronousInsertQueue.cpp
@ -407,14 +407,20 @@ try
    }

    StreamingFormatExecutor executor(header, format, std::move(on_error), std::move(adding_defaults_transform));
-    std::unique_ptr<ReadBuffer> buffer;
+    std::unique_ptr<ReadBuffer> last_buffer;
    for (const auto & entry : data->entries)
    {
-        buffer = std::make_unique<ReadBufferFromString>(entry->bytes);
+        auto buffer = std::make_unique<ReadBufferFromString>(entry->bytes);
        current_entry = entry;
        total_rows += executor.execute(*buffer);
+
+        /// Keep buffer, because it still can be used
+        /// in destructor, while resetting buffer at next iteration.
+        last_buffer = std::move(buffer);
    }

+    format->addBuffer(std::move(last_buffer));
+
    auto chunk = Chunk(executor.getResultColumns(), total_rows);
    size_t total_bytes = chunk.bytes();

--- a/src/Interpreters/ClientInfo.h
+++ b/src/Interpreters/ClientInfo.h
@ -84,6 +84,14 @@ public:
    UInt64 client_version_patch = 0;
    unsigned client_tcp_protocol_version = 0;

+    /// In case of distributed query, client info for query is actually a client info of client.
+    /// In order to get a version of server-initiator, use connection_ values.
+    /// Also for tcp only.
+    UInt64 connection_client_version_major = 0;
+    UInt64 connection_client_version_minor = 0;
+    UInt64 connection_client_version_patch = 0;
+    unsigned connection_tcp_protocol_version = 0;
+
    /// For http
    HTTPMethod http_method = HTTPMethod::UNKNOWN;
    String http_user_agent;
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@ -69,6 +69,7 @@

 #include <Functions/IFunction.h>
 #include <Core/Field.h>
+#include <Core/ProtocolDefines.h>
 #include <base/types.h>
 #include <Columns/Collator.h>
 #include <Common/FieldVisitorsAccurateComparison.h>
@ -2556,6 +2557,19 @@ void InterpreterSelectQuery::initSettings()
    auto & query = getSelectQuery();
    if (query.settings())
        InterpreterSetQuery(query.settings(), context).executeForCurrentContext();
+
+    auto & client_info = context->getClientInfo();
+    auto min_major = DBMS_MIN_MAJOR_VERSION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD;
+    auto min_minor = DBMS_MIN_MINOR_VERSION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD;
+
+    if (client_info.query_kind == ClientInfo::QueryKind::SECONDARY_QUERY &&
+        std::forward_as_tuple(client_info.connection_client_version_major, client_info.connection_client_version_minor) < std::forward_as_tuple(min_major, min_minor))
+    {
+        /// Disable two-level aggregation due to version incompatibility.
+        context->setSetting("group_by_two_level_threshold", Field(0));
+        context->setSetting("group_by_two_level_threshold_bytes", Field(0));
+
+    }
 }

 }
--- a/src/Interpreters/JIT/CompiledExpressionCache.cpp
+++ b/src/Interpreters/JIT/CompiledExpressionCache.cpp
@ -16,12 +16,12 @@ CompiledExpressionCacheFactory & CompiledExpressionCacheFactory::instance()
    return factory;
 }

-void CompiledExpressionCacheFactory::init(size_t cache_size)
+void CompiledExpressionCacheFactory::init(size_t cache_size_in_bytes, size_t cache_size_in_elements)
 {
    if (cache)
        throw Exception(ErrorCodes::LOGICAL_ERROR, "CompiledExpressionCache was already initialized");

-    cache = std::make_unique<CompiledExpressionCache>(cache_size);
+    cache = std::make_unique<CompiledExpressionCache>(cache_size_in_bytes, cache_size_in_elements);
 }

 CompiledExpressionCache * CompiledExpressionCacheFactory::tryGetCache()
--- a/src/Interpreters/JIT/CompiledExpressionCache.h
+++ b/src/Interpreters/JIT/CompiledExpressionCache.h
@ -52,7 +52,7 @@ private:
 public:
    static CompiledExpressionCacheFactory & instance();

-    void init(size_t cache_size);
+    void init(size_t cache_size_in_bytes, size_t cache_size_in_elements);
    CompiledExpressionCache * tryGetCache();
 };

--- a/src/Interpreters/fuzzers/CMakeLists.txt
+++ b/src/Interpreters/fuzzers/CMakeLists.txt
@ -7,4 +7,3 @@ target_link_libraries(execute_query_fuzzer PRIVATE
    clickhouse_dictionaries
    clickhouse_dictionaries_embedded
    ${LIB_FUZZING_ENGINE})
-
--- a/src/Parsers/ParserInsertQuery.cpp
+++ b/src/Parsers/ParserInsertQuery.cpp
@ -131,18 +131,22 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
    }

    Pos before_values = pos;
+    String format_str;

    /// VALUES or FROM INFILE or FORMAT or SELECT
    if (!infile && s_values.ignore(pos, expected))
    {
        /// If VALUES is defined in query, everything except setting will be parsed as data
        data = pos->begin;
+        format_str = "Values";
    }
    else if (s_format.ignore(pos, expected))
    {
        /// If FORMAT is defined, read format name
        if (!name_p.parse(pos, format, expected))
            return false;
+
+        tryGetIdentifierNameInto(format, format_str);
    }
    else if (s_select.ignore(pos, expected) || s_with.ignore(pos,expected))
    {
@ -155,6 +159,8 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
        /// FORMAT section is expected if we have input() in SELECT part
        if (s_format.ignore(pos, expected) && !name_p.parse(pos, format, expected))
            return false;
+
+        tryGetIdentifierNameInto(format, format_str);
    }
    else if (s_watch.ignore(pos, expected))
    {
@ -242,9 +248,8 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
        tryGetIdentifierNameInto(table, query->table_id.table_name);
    }

-    tryGetIdentifierNameInto(format, query->format);
-
    query->columns = columns;
+    query->format = std::move(format_str);
    query->select = select;
    query->watch = watch;
    query->settings_ast = settings_ast;
--- a/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt
+++ b/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt
@ -45,4 +45,4 @@ set_source_files_properties("${PROTO_SRCS}" "out.cpp" PROPERTIES COMPILE_FLAGS "
 target_include_directories(codegen_select_fuzzer BEFORE PRIVATE "${Protobuf_INCLUDE_DIR}" "${CMAKE_CURRENT_BINARY_DIR}")
 target_include_directories(codegen_select_fuzzer BEFORE PRIVATE "${LibProtobufMutator_SOURCE_DIR}")
 target_include_directories(codegen_select_fuzzer BEFORE PRIVATE "${LibProtobufMutator_SOURCE_DIR}/src")
-target_link_libraries(codegen_select_fuzzer PRIVATE protobuf-mutator dbms ${LIB_FUZZING_ENGINE})
+target_link_libraries(codegen_select_fuzzer PRIVATE protobuf-mutator ${Protobuf_LIBRARY} ${Protobuf_PROTOC_LIBRARY} dbms ${LIB_FUZZING_ENGINE})
--- a/src/Processors/Formats/IInputFormat.h
+++ b/src/Processors/Formats/IInputFormat.h
@ -55,6 +55,8 @@ public:
     */
    virtual void resetParser();

+    virtual void setReadBuffer(ReadBuffer & in_);
+
    virtual const BlockMissingValues & getMissingValues() const
    {
        static const BlockMissingValues none;
@ -70,7 +72,6 @@ public:
    void setCurrentUnitNumber(size_t current_unit_number_) { current_unit_number = current_unit_number_; }

    void addBuffer(std::unique_ptr<ReadBuffer> buffer) { owned_buffers.emplace_back(std::move(buffer)); }
-    void setReadBuffer(ReadBuffer & in_);

 protected:
    ColumnMappingPtr column_mapping{};
--- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp
@ -32,13 +32,25 @@ namespace ErrorCodes
 }


-ValuesBlockInputFormat::ValuesBlockInputFormat(ReadBuffer & in_, const Block & header_, const RowInputFormatParams & params_,
-                                               const FormatSettings & format_settings_)
-        : IInputFormat(header_, buf), buf(in_), params(params_),
-          format_settings(format_settings_), num_columns(header_.columns()),
-          parser_type_for_column(num_columns, ParserType::Streaming),
-          attempts_to_deduce_template(num_columns), attempts_to_deduce_template_cached(num_columns),
-          rows_parsed_using_template(num_columns), templates(num_columns), types(header_.getDataTypes())
+ValuesBlockInputFormat::ValuesBlockInputFormat(
+    ReadBuffer & in_,
+    const Block & header_,
+    const RowInputFormatParams & params_,
+    const FormatSettings & format_settings_)
+    : ValuesBlockInputFormat(std::make_unique<PeekableReadBuffer>(in_), header_, params_, format_settings_)
+{
+}
+
+ValuesBlockInputFormat::ValuesBlockInputFormat(
+    std::unique_ptr<PeekableReadBuffer> buf_,
+    const Block & header_,
+    const RowInputFormatParams & params_,
+    const FormatSettings & format_settings_)
+    : IInputFormat(header_, *buf_), buf(std::move(buf_)),
+        params(params_), format_settings(format_settings_), num_columns(header_.columns()),
+        parser_type_for_column(num_columns, ParserType::Streaming),
+        attempts_to_deduce_template(num_columns), attempts_to_deduce_template_cached(num_columns),
+        rows_parsed_using_template(num_columns), templates(num_columns), types(header_.getDataTypes())
 {
    serializations.resize(types.size());
    for (size_t i = 0; i < types.size(); ++i)
@ -58,8 +70,8 @@ Chunk ValuesBlockInputFormat::generate()
    {
        try
        {
-            skipWhitespaceIfAny(buf);
-            if (buf.eof() || *buf.position() == ';')
+            skipWhitespaceIfAny(*buf);
+            if (buf->eof() || *buf->position() == ';')
                break;
            readRow(columns, rows_in_block);
        }
@ -99,12 +111,12 @@ Chunk ValuesBlockInputFormat::generate()

 void ValuesBlockInputFormat::readRow(MutableColumns & columns, size_t row_num)
 {
-    assertChar('(', buf);
+    assertChar('(', *buf);

    for (size_t column_idx = 0; column_idx < num_columns; ++column_idx)
    {
-        skipWhitespaceIfAny(buf);
-        PeekableReadBufferCheckpoint checkpoint{buf};
+        skipWhitespaceIfAny(*buf);
+        PeekableReadBufferCheckpoint checkpoint{*buf};
        bool read;

        /// Parse value using fast streaming parser for literals and slow SQL parser for expressions.
@ -123,9 +135,9 @@ void ValuesBlockInputFormat::readRow(MutableColumns & columns, size_t row_num)
        /// If read is true, value still may be missing. Bit mask for these values will be copied from ConstantExpressionTemplate later.
    }

-    skipWhitespaceIfAny(buf);
-    if (!buf.eof() && *buf.position() == ',')
-        ++buf.position();
+    skipWhitespaceIfAny(*buf);
+    if (!buf->eof() && *buf->position() == ',')
+        ++buf->position();

    ++total_rows;
 }
@ -134,7 +146,7 @@ bool ValuesBlockInputFormat::tryParseExpressionUsingTemplate(MutableColumnPtr &
 {
    /// Try to parse expression using template if one was successfully deduced while parsing the first row
    auto settings = context->getSettingsRef();
-    if (templates[column_idx]->parseExpression(buf, format_settings, settings))
+    if (templates[column_idx]->parseExpression(*buf, format_settings, settings))
    {
        ++rows_parsed_using_template[column_idx];
        return true;
@ -154,7 +166,7 @@ bool ValuesBlockInputFormat::tryParseExpressionUsingTemplate(MutableColumnPtr &
    }
    /// Do not use this template anymore
    templates[column_idx].reset();
-    buf.rollbackToCheckpoint();
+    buf->rollbackToCheckpoint();

    /// It will deduce new template or fallback to slow SQL parser
    return parseExpression(*column, column_idx);
@ -169,13 +181,13 @@ bool ValuesBlockInputFormat::tryReadValue(IColumn & column, size_t column_idx)
        const auto & type = types[column_idx];
        const auto & serialization = serializations[column_idx];
        if (format_settings.null_as_default && !type->isNullable())
-            read = SerializationNullable::deserializeTextQuotedImpl(column, buf, format_settings, serialization);
+            read = SerializationNullable::deserializeTextQuotedImpl(column, *buf, format_settings, serialization);
        else
-            serialization->deserializeTextQuoted(column, buf, format_settings);
+            serialization->deserializeTextQuoted(column, *buf, format_settings);

        rollback_on_exception = true;

-        skipWhitespaceIfAny(buf);
+        skipWhitespaceIfAny(*buf);
        assertDelimiterAfterValue(column_idx);
        return read;
    }
@ -190,7 +202,7 @@ bool ValuesBlockInputFormat::tryReadValue(IColumn & column, size_t column_idx)

        /// Switch to SQL parser and don't try to use streaming parser for complex expressions
        /// Note: Throwing exceptions for each expression may be very slow because of stacktraces
-        buf.rollbackToCheckpoint();
+        buf->rollbackToCheckpoint();
        return parseExpression(column, column_idx);
    }
 }
@ -284,11 +296,11 @@ bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx

    /// We need continuous memory containing the expression to use Lexer
    skipToNextRow(0, 1);
-    buf.makeContinuousMemoryFromCheckpointToPos();
-    buf.rollbackToCheckpoint();
+    buf->makeContinuousMemoryFromCheckpointToPos();
+    buf->rollbackToCheckpoint();

    Expected expected;
-    Tokens tokens(buf.position(), buf.buffer().end());
+    Tokens tokens(buf->position(), buf->buffer().end());
    IParser::Pos token_iterator(tokens, settings.max_parser_depth);
    ASTPtr ast;

@ -302,7 +314,7 @@ bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx

    if (!parsed)
        throw Exception("Cannot parse expression of type " + type.getName() + " here: "
-                        + String(buf.position(), std::min(SHOW_CHARS_ON_SYNTAX_ERROR, buf.buffer().end() - buf.position())),
+                        + String(buf->position(), std::min(SHOW_CHARS_ON_SYNTAX_ERROR, buf->buffer().end() - buf->position())),
                        ErrorCodes::SYNTAX_ERROR);
    ++token_iterator;

@ -316,9 +328,9 @@ bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx
        try
        {
            const auto & serialization = serializations[column_idx];
-            serialization->deserializeTextQuoted(column, buf, format_settings);
+            serialization->deserializeTextQuoted(column, *buf, format_settings);
            rollback_on_exception = true;
-            skipWhitespaceIfAny(buf);
+            skipWhitespaceIfAny(*buf);
            if (checkDelimiterAfterValue(column_idx))
                ok = true;
        }
@ -366,8 +378,8 @@ bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx
            else
                ++attempts_to_deduce_template[column_idx];

-            buf.rollbackToCheckpoint();
-            if (templates[column_idx]->parseExpression(buf, format_settings, settings))
+            buf->rollbackToCheckpoint();
+            if (templates[column_idx]->parseExpression(*buf, format_settings, settings))
            {
                ++rows_parsed_using_template[column_idx];
                parser_type_for_column[column_idx] = ParserType::BatchTemplate;
@ -384,9 +396,9 @@ bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx
                std::rethrow_exception(exception);
            else
            {
-                buf.rollbackToCheckpoint();
-                size_t len = const_cast<char *>(token_iterator->begin) - buf.position();
-                throw Exception("Cannot deduce template of expression: " + std::string(buf.position(), len), ErrorCodes::SYNTAX_ERROR);
+                buf->rollbackToCheckpoint();
+                size_t len = const_cast<char *>(token_iterator->begin) - buf->position();
+                throw Exception("Cannot deduce template of expression: " + std::string(buf->position(), len), ErrorCodes::SYNTAX_ERROR);
            }
        }
        /// Continue parsing without template
@ -397,7 +409,7 @@ bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx
        throw Exception("Interpreting expressions is disabled", ErrorCodes::SUPPORT_IS_DISABLED);

    /// Try to evaluate single expression if other parsers don't work
-    buf.position() = const_cast<char *>(token_iterator->begin);
+    buf->position() = const_cast<char *>(token_iterator->begin);

    std::pair<Field, DataTypePtr> value_raw = evaluateConstantExpression(ast, context);

@ -416,10 +428,10 @@ bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx
            type.insertDefaultInto(column);
            return false;
        }
-        buf.rollbackToCheckpoint();
+        buf->rollbackToCheckpoint();
        throw Exception{"Cannot insert NULL value into a column of type '" + type.getName() + "'"
                        + " at: " +
-                        String(buf.position(), std::min(SHOW_CHARS_ON_SYNTAX_ERROR, buf.buffer().end() - buf.position())),
+                        String(buf->position(), std::min(SHOW_CHARS_ON_SYNTAX_ERROR, buf->buffer().end() - buf->position())),
                        ErrorCodes::TYPE_MISMATCH};
    }

@ -430,61 +442,61 @@ bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx
 /// Can be used in fileSegmentationEngine for parallel parsing of Values
 bool ValuesBlockInputFormat::skipToNextRow(size_t min_chunk_bytes, int balance)
 {
-    skipWhitespaceIfAny(buf);
-    if (buf.eof() || *buf.position() == ';')
+    skipWhitespaceIfAny(*buf);
+    if (buf->eof() || *buf->position() == ';')
        return false;
    bool quoted = false;

-    size_t chunk_begin_buf_count = buf.count();
-    while (!buf.eof() && (balance || buf.count() - chunk_begin_buf_count < min_chunk_bytes))
+    size_t chunk_begin_buf_count = buf->count();
+    while (!buf->eof() && (balance || buf->count() - chunk_begin_buf_count < min_chunk_bytes))
    {
-        buf.position() = find_first_symbols<'\\', '\'', ')', '('>(buf.position(), buf.buffer().end());
-        if (buf.position() == buf.buffer().end())
+        buf->position() = find_first_symbols<'\\', '\'', ')', '('>(buf->position(), buf->buffer().end());
+        if (buf->position() == buf->buffer().end())
            continue;
-        if (*buf.position() == '\\')
+        if (*buf->position() == '\\')
        {
-            ++buf.position();
-            if (!buf.eof())
-                ++buf.position();
+            ++buf->position();
+            if (!buf->eof())
+                ++buf->position();
        }
-        else if (*buf.position() == '\'')
+        else if (*buf->position() == '\'')
        {
            quoted ^= true;
-            ++buf.position();
+            ++buf->position();
        }
-        else if (*buf.position() == ')')
+        else if (*buf->position() == ')')
        {
-            ++buf.position();
+            ++buf->position();
            if (!quoted)
                --balance;
        }
-        else if (*buf.position() == '(')
+        else if (*buf->position() == '(')
        {
-            ++buf.position();
+            ++buf->position();
            if (!quoted)
                ++balance;
        }
    }

-    if (!buf.eof() && *buf.position() == ',')
-        ++buf.position();
+    if (!buf->eof() && *buf->position() == ',')
+        ++buf->position();
    return true;
 }

 void ValuesBlockInputFormat::assertDelimiterAfterValue(size_t column_idx)
 {
    if (unlikely(!checkDelimiterAfterValue(column_idx)))
-        throwAtAssertionFailed((column_idx + 1 == num_columns) ? ")" : ",", buf);
+        throwAtAssertionFailed((column_idx + 1 == num_columns) ? ")" : ",", *buf);
 }

 bool ValuesBlockInputFormat::checkDelimiterAfterValue(size_t column_idx)
 {
-    skipWhitespaceIfAny(buf);
+    skipWhitespaceIfAny(*buf);

    if (likely(column_idx + 1 != num_columns))
-        return checkChar(',', buf);
+        return checkChar(',', *buf);
    else
-        return checkChar(')', buf);
+        return checkChar(')', *buf);
 }

 bool ValuesBlockInputFormat::shouldDeduceNewTemplate(size_t column_idx)
@ -516,21 +528,21 @@ bool ValuesBlockInputFormat::shouldDeduceNewTemplate(size_t column_idx)
 void ValuesBlockInputFormat::readPrefix()
 {
    /// In this format, BOM at beginning of stream cannot be confused with value, so it is safe to skip it.
-    skipBOMIfExists(buf);
+    skipBOMIfExists(*buf);
 }

 void ValuesBlockInputFormat::readSuffix()
 {
-    if (!buf.eof() && *buf.position() == ';')
+    if (!buf->eof() && *buf->position() == ';')
    {
-        ++buf.position();
-        skipWhitespaceIfAny(buf);
-        if (buf.hasUnreadData())
+        ++buf->position();
+        skipWhitespaceIfAny(*buf);
+        if (buf->hasUnreadData())
            throw Exception("Cannot read data after semicolon", ErrorCodes::CANNOT_READ_ALL_DATA);
        return;
    }

-    if (buf.hasUnreadData())
+    if (buf->hasUnreadData())
        throw Exception("Unread data in PeekableReadBuffer will be lost. Most likely it's a bug.", ErrorCodes::LOGICAL_ERROR);
 }

@ -539,10 +551,16 @@ void ValuesBlockInputFormat::resetParser()
    IInputFormat::resetParser();
    // I'm not resetting parser modes here.
    // There is a good chance that all messages have the same format.
-    buf.reset();
+    buf->reset();
    total_rows = 0;
 }

+void ValuesBlockInputFormat::setReadBuffer(ReadBuffer & in_)
+{
+    buf = std::make_unique<PeekableReadBuffer>(in_);
+    IInputFormat::setReadBuffer(*buf);
+}
+
 void registerInputFormatValues(FormatFactory & factory)
 {
    factory.registerInputFormat("Values", [](
--- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.h
+++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.h
@ -32,6 +32,7 @@ public:
    String getName() const override { return "ValuesBlockInputFormat"; }

    void resetParser() override;
+    void setReadBuffer(ReadBuffer & in_) override;

    /// TODO: remove context somehow.
    void setContext(ContextPtr context_) { context = Context::createCopy(context_); }
@ -39,6 +40,9 @@ public:
    const BlockMissingValues & getMissingValues() const override { return block_missing_values; }

 private:
+    ValuesBlockInputFormat(std::unique_ptr<PeekableReadBuffer> buf_, const Block & header_, const RowInputFormatParams & params_,
+                           const FormatSettings & format_settings_);
+
    enum class ParserType
    {
        Streaming,
@ -66,7 +70,7 @@ private:

    bool skipToNextRow(size_t min_chunk_bytes = 0, int balance = 0);

-    PeekableReadBuffer buf;
+    std::unique_ptr<PeekableReadBuffer> buf;

    const RowInputFormatParams params;

--- a/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp
+++ b/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp
@ -42,12 +42,12 @@ InputFormatPtr getInputFormatFromASTInsertQuery(
    if (ast_insert_query->infile && context->getApplicationType() == Context::ApplicationType::SERVER)
        throw Exception("Query has infile and was send directly to server", ErrorCodes::UNKNOWN_TYPE_OF_QUERY);

-    String format = ast_insert_query->format;
-    if (format.empty())
+    if (ast_insert_query->format.empty())
    {
        if (input_function)
            throw Exception("FORMAT must be specified for function input()", ErrorCodes::INVALID_USAGE_OF_INPUT);
-        format = "Values";
+        else
+            throw Exception("Logical error: INSERT query requires format to be set", ErrorCodes::LOGICAL_ERROR);
    }

    /// Data could be in parsed (ast_insert_query.data) and in not parsed yet (input_buffer_tail_part) part of query.
@ -59,7 +59,7 @@ InputFormatPtr getInputFormatFromASTInsertQuery(
        : std::make_unique<EmptyReadBuffer>();

    /// Create a source from input buffer using format from query
-    auto source = context->getInputFormat(format, *input_buffer, header, context->getSettings().max_insert_block_size);
+    auto source = context->getInputFormat(ast_insert_query->format, *input_buffer, header, context->getSettings().max_insert_block_size);
    source->addBuffer(std::move(input_buffer));
    return source;
 }
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@ -1126,6 +1126,11 @@ void TCPHandler::receiveHello()
    client_info.client_version_patch = client_version_patch;
    client_info.client_tcp_protocol_version = client_tcp_protocol_version;

+    client_info.connection_client_version_major = client_version_major;
+    client_info.connection_client_version_minor = client_version_minor;
+    client_info.connection_client_version_patch = client_version_patch;
+    client_info.connection_tcp_protocol_version = client_tcp_protocol_version;
+
    is_interserver_mode = (user == USER_INTERSERVER_MARKER);
    if (is_interserver_mode)
    {
--- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp
+++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp
@ -1044,10 +1044,15 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r
    /// Filter in WHERE instead
    else
    {
-        auto type = getSampleBlock().getByName(prewhere_info->prewhere_column_name).type;
-        ColumnWithTypeAndName col(result.getFilterHolder()->convertToFullColumnIfConst(), std::make_shared<DataTypeUInt8>(), "");
-        result.columns[prewhere_column_pos] = castColumn(col, type);
-        result.clearFilter(); // Acting as a flag to not filter in PREWHERE
+        if (prewhere_info->remove_prewhere_column)
+            result.columns.erase(result.columns.begin() + prewhere_column_pos);
+        else
+        {
+            auto type = getSampleBlock().getByName(prewhere_info->prewhere_column_name).type;
+            ColumnWithTypeAndName col(result.getFilterHolder()->convertToFullColumnIfConst(), std::make_shared<DataTypeUInt8>(), "");
+            result.columns[prewhere_column_pos] = castColumn(col, type);
+            result.clearFilter(); // Acting as a flag to not filter in PREWHERE
+        }
    }
 }

--- a/src/Storages/StorageBuffer.cpp
+++ b/src/Storages/StorageBuffer.cpp
@ -446,7 +446,8 @@ static void appendBlock(const Block & from, Block & to)
    if (!to)
        throw Exception("Cannot append to empty block", ErrorCodes::LOGICAL_ERROR);

-    assertBlocksHaveEqualStructure(from, to, "Buffer");
+    if (to.rows())
+        assertBlocksHaveEqualStructure(from, to, "Buffer");

    from.checkNumberOfRows();
    to.checkNumberOfRows();
@ -464,14 +465,21 @@ static void appendBlock(const Block & from, Block & to)
    {
        MemoryTracker::BlockerInThread temporarily_disable_memory_tracker;

-        for (size_t column_no = 0, columns = to.columns(); column_no < columns; ++column_no)
+        if (to.rows() == 0)
        {
-            const IColumn & col_from = *from.getByPosition(column_no).column.get();
-            last_col = IColumn::mutate(std::move(to.getByPosition(column_no).column));
+            to = from;
+        }
+        else
+        {
+            for (size_t column_no = 0, columns = to.columns(); column_no < columns; ++column_no)
+            {
+                const IColumn & col_from = *from.getByPosition(column_no).column.get();
+                last_col = IColumn::mutate(std::move(to.getByPosition(column_no).column));

-            last_col->insertRangeFrom(col_from, 0, rows);
+                last_col->insertRangeFrom(col_from, 0, rows);

-            to.getByPosition(column_no).column = std::move(last_col);
+                to.getByPosition(column_no).column = std::move(last_col);
+            }
        }
    }
    catch (...)
--- a/tests/ci/build_check.py
+++ b/tests/ci/build_check.py
@ -0,0 +1,209 @@
+#!/usr/bin/env python3
+#
+import subprocess
+import logging
+from s3_helper import S3Helper
+import json
+import os
+from pr_info import PRInfo
+from github import Github
+import shutil
+from get_robot_token import get_best_robot_token, get_parameter_from_ssm
+import os
+import sys
+import time
+from version_helper import get_version_from_repo, update_version_local
+
+
+def get_build_config(build_check_name, build_number, repo_path):
+    if build_check_name == 'ClickHouse build check (actions)':
+        build_config_name = 'build_config'
+    elif build_check_name == 'ClickHouse special build check (actions)':
+        build_config_name = 'special_build_config'
+    else:
+        raise Exception(f"Unknown build check name {build_check_name}")
+
+    ci_config_path = os.path.join(repo_path, "tests/ci/ci_config.json")
+    with open(ci_config_path, 'r') as ci_config:
+        config_dict = json.load(ci_config)
+        return config_dict[build_config_name][build_number]
+
+
+def _can_export_binaries(build_config):
+    if build_config['package-type'] != 'deb':
+        return False
+    if build_config['bundled'] != "bundled":
+        return False
+    if build_config['splitted'] == 'splitted':
+        return False
+    if build_config['sanitizer'] != '':
+        return True
+    if build_config['build-type'] != '':
+        return True
+    return False
+
+
+def get_packager_cmd(build_config, packager_path, output_path, build_version, image_version, ccache_path):
+    package_type = build_config['package-type']
+    comp = build_config['compiler']
+    cmd = f"cd {packager_path} && ./packager --output-dir={output_path} --package-type={package_type} --compiler={comp}"
+
+    if build_config['build-type']:
+        cmd += ' --build-type={}'.format(build_config['build-type'])
+    if build_config['sanitizer']:
+        cmd += ' --sanitizer={}'.format(build_config['sanitizer'])
+    if build_config['bundled'] == 'unbundled':
+        cmd += ' --unbundled'
+    if build_config['splitted'] == 'splitted':
+        cmd += ' --split-binary'
+    if build_config['tidy'] == 'enable':
+        cmd += ' --clang-tidy'
+
+    cmd += ' --cache=ccache'
+    cmd += ' --ccache_dir={}'.format(ccache_path)
+
+    if 'alien_pkgs' in build_config and build_config['alien_pkgs']:
+        cmd += ' --alien-pkgs'
+
+    cmd += ' --docker-image-version={}'.format(image_version)
+    cmd += ' --version={}'.format(build_version)
+
+    if _can_export_binaries(build_config):
+        cmd += ' --with-binaries=tests'
+
+    return cmd
+
+def get_image_name(build_config):
+    if build_config['bundled'] != 'bundled':
+        return 'clickhouse/unbundled-builder'
+    elif build_config['package-type'] != 'deb':
+        return 'clickhouse/binary-builder'
+    else:
+        return 'clickhouse/deb-builder'
+
+
+def build_clickhouse(packager_cmd, logs_path):
+    build_log_path = os.path.join(logs_path, 'build_log.log')
+    with open(build_log_path, 'w') as log_file:
+        retcode = subprocess.Popen(packager_cmd, shell=True, stderr=log_file, stdout=log_file).wait()
+        if retcode == 0:
+            logging.info("Built successfully")
+        else:
+            logging.info("Build failed")
+    return build_log_path, retcode == 0
+
+def build_config_to_string(build_config):
+    if build_config["package-type"] == "performance":
+        return "performance"
+
+    return "_".join([
+        build_config['compiler'],
+        build_config['build-type'] if build_config['build-type'] else "relwithdebuginfo",
+        build_config['sanitizer'] if build_config['sanitizer'] else "none",
+        build_config['bundled'],
+        build_config['splitted'],
+        "tidy" if build_config['tidy'] == "enable" else "notidy",
+        "with_coverage" if build_config['with_coverage'] else "without_coverage",
+        build_config['package-type'],
+    ])
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+    repo_path = os.getenv("REPO_COPY", os.path.abspath("../../"))
+    temp_path = os.getenv("TEMP_PATH", os.path.abspath("."))
+    caches_path = os.getenv("CACHES_PATH", temp_path)
+
+    build_check_name = sys.argv[1]
+    build_number = int(sys.argv[2])
+
+    build_config = get_build_config(build_check_name, build_number, repo_path)
+
+    if not os.path.exists(temp_path):
+        os.makedirs(temp_path)
+
+    with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file:
+        event = json.load(event_file)
+
+    pr_info = PRInfo(event)
+
+    logging.info("Repo copy path %s", repo_path)
+
+    gh = Github(get_best_robot_token())
+
+    images_path = os.path.join(temp_path, 'changed_images.json')
+    image_name = get_image_name(build_config)
+    image_version = 'latest'
+    if os.path.exists(images_path):
+        logging.info("Images file exists")
+        with open(images_path, 'r') as images_fd:
+            images = json.load(images_fd)
+            logging.info("Got images %s", images)
+            if image_name in images:
+                image_version = images[image_name]
+
+    for i in range(10):
+        try:
+            logging.info(f"Pulling image {image_name}:{image_version}")
+            subprocess.check_output(f"docker pull {image_name}:{image_version}", stderr=subprocess.STDOUT, shell=True)
+            break
+        except Exception as ex:
+            time.sleep(i * 3)
+            logging.info("Got execption pulling docker %s", ex)
+    else:
+        raise Exception(f"Cannot pull dockerhub for image docker pull {image_name}:{image_version}")
+
+    version = get_version_from_repo(repo_path)
+    version.tweak_update()
+    update_version_local(repo_path, pr_info.sha, version)
+
+    build_name = build_config_to_string(build_config)
+    logging.info(f"Build short name {build_name}")
+    subprocess.check_call(f"echo 'BUILD_NAME=build_urls_{build_name}' >> $GITHUB_ENV", shell=True)
+
+    build_output_path = os.path.join(temp_path, build_name)
+    if not os.path.exists(build_output_path):
+        os.makedirs(build_output_path)
+
+    ccache_path = os.path.join(caches_path, build_name + '_ccache')
+    if not os.path.exists(ccache_path):
+        os.makedirs(ccache_path)
+
+    packager_cmd = get_packager_cmd(build_config, os.path.join(repo_path, "docker/packager"), build_output_path, version.get_version_string(), image_version, ccache_path)
+    logging.info("Going to run packager with %s", packager_cmd)
+
+    build_clickhouse_log = os.path.join(temp_path, "build_log")
+    if not os.path.exists(build_clickhouse_log):
+        os.makedirs(build_clickhouse_log)
+
+    start = time.time()
+    log_path, success = build_clickhouse(packager_cmd, build_clickhouse_log)
+    elapsed = int(time.time() - start)
+    subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {build_output_path}", shell=True)
+    subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {ccache_path}", shell=True)
+    logging.info("Build finished with %s, log path %s", success, log_path)
+
+    s3_helper = S3Helper('https://s3.amazonaws.com')
+    s3_path_prefix = str(pr_info.number) + "/" + pr_info.sha + "/" + build_check_name.lower().replace(' ', '_') + "/" + build_name
+    if os.path.exists(log_path):
+        log_url = s3_helper.upload_build_file_to_s3(log_path, s3_path_prefix + "/" + os.path.basename(log_path))
+        logging.info("Log url %s", log_url)
+    else:
+        logging.info("Build log doesn't exist")
+
+    build_urls = s3_helper.upload_build_folder_to_s3(build_output_path, s3_path_prefix, keep_dirs_in_s3_path=False, upload_symlinks=False)
+    logging.info("Got build URLs %s", build_urls)
+
+    print("::notice ::Build URLs: {}".format('\n'.join(build_urls)))
+
+    result = {
+        "log_url": log_url,
+        "build_urls": build_urls,
+        "build_config": build_config,
+        "elapsed_seconds": elapsed,
+        "status": success,
+    }
+
+    print("::notice ::Log URL: {}".format(log_url))
+
+    with open(os.path.join(temp_path, "build_urls_" + build_name + '.json'), 'w') as build_links:
+        json.dump(result, build_links)
--- a/tests/ci/build_report_check.py
+++ b/tests/ci/build_report_check.py
@ -0,0 +1,164 @@
+#!/usr/bin/env python3
+
+import json
+from github import Github
+from report import create_build_html_report
+from s3_helper import S3Helper
+import logging
+import os
+from get_robot_token import get_best_robot_token
+import sys
+from pr_info import PRInfo
+
+class BuildResult(object):
+    def __init__(self, compiler, build_type, sanitizer, bundled, splitted, status, elapsed_seconds, with_coverage):
+        self.compiler = compiler
+        self.build_type = build_type
+        self.sanitizer = sanitizer
+        self.bundled = bundled
+        self.splitted = splitted
+        self.status = status
+        self.elapsed_seconds = elapsed_seconds
+        self.with_coverage = with_coverage
+
+def group_by_artifacts(build_urls):
+    groups = {'deb': [], 'binary': [], 'tgz': [], 'rpm': [], 'preformance': []}
+    for url in build_urls:
+        if url.endswith('performance.tgz'):
+            groups['performance'].append(url)
+        elif url.endswith('.deb') or url.endswith('.buildinfo') or url.endswith('.changes') or url.endswith('.tar.gz'):
+            groups['deb'].append(url)
+        elif url.endswith('.rpm'):
+            groups['rpm'].append(url)
+        elif url.endswith('.tgz'):
+            groups['tgz'].append(url)
+        else:
+            groups['binary'].append(url)
+    return groups
+
+def get_commit(gh, commit_sha):
+    repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse"))
+    commit = repo.get_commit(commit_sha)
+    return commit
+
+def process_report(build_report):
+    build_config = build_report['build_config']
+    build_result = BuildResult(
+        compiler=build_config['compiler'],
+        build_type=build_config['build-type'],
+        sanitizer=build_config['sanitizer'],
+        bundled=build_config['bundled'],
+        splitted=build_config['splitted'],
+        status="success" if build_report['status'] else "failure",
+        elapsed_seconds=build_report['elapsed_seconds'],
+        with_coverage=False
+    )
+    build_results = []
+    build_urls = []
+    build_logs_urls = []
+    urls_groups = group_by_artifacts(build_report['build_urls'])
+    found_group = False
+    for _, group_urls in urls_groups.items():
+        if group_urls:
+            build_results.append(build_result)
+            build_urls.append(group_urls)
+            build_logs_urls.append(build_report['log_url'])
+            found_group = True
+
+    if not found_group:
+        build_results.append(build_result)
+        build_urls.append([""])
+        build_logs_urls.append(build_report['log_url'])
+
+    return build_results, build_urls, build_logs_urls
+
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+    reports_path = os.getenv("REPORTS_PATH", "./reports")
+    temp_path = os.path.join(os.getenv("TEMP_PATH", "."))
+    logging.info("Reports path %s", reports_path)
+
+    if not os.path.exists(temp_path):
+        os.makedirs(temp_path)
+
+    build_check_name = sys.argv[1]
+
+    build_reports = []
+    for root, dirs, files in os.walk(reports_path):
+        print(files)
+        for f in files:
+            if f.startswith("build_urls_") and f.endswith('.json'):
+                logging.info("Found build report json %s", f)
+                with open(os.path.join(root, f), 'r') as file_handler:
+                    build_report = json.load(file_handler)
+                    build_reports.append(build_report)
+
+
+    build_results = []
+    build_artifacts = []
+    build_logs = []
+
+    for build_report in build_reports:
+        build_result, build_artifacts_url, build_logs_url = process_report(build_report)
+        logging.info("Got %s result for report", len(build_result))
+        build_results += build_result
+        build_artifacts += build_artifacts_url
+        build_logs += build_logs_url
+
+    logging.info("Totally got %s results", len(build_results))
+
+    gh = Github(get_best_robot_token())
+    s3_helper = S3Helper('https://s3.amazonaws.com')
+    with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file:
+        event = json.load(event_file)
+
+    pr_info = PRInfo(event)
+
+    branch_url = "https://github.com/ClickHouse/ClickHouse/commits/master"
+    branch_name = "master"
+    if pr_info.number != 0:
+        branch_name = "PR #{}".format(pr_info.number)
+        branch_url = "https://github.com/ClickHouse/ClickHouse/pull/" + str(pr_info.number)
+    commit_url = f"https://github.com/ClickHouse/ClickHouse/commit/{pr_info.sha}"
+    task_url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID', 0)}"
+    report = create_build_html_report(
+        build_check_name,
+        build_results,
+        build_logs,
+        build_artifacts,
+        task_url,
+        branch_url,
+        branch_name,
+        commit_url
+    )
+
+    report_path = os.path.join(temp_path, 'report.html')
+    with open(report_path, 'w') as f:
+        f.write(report)
+
+    logging.info("Going to upload prepared report")
+    context_name_for_path = build_check_name.lower().replace(' ', '_')
+    s3_path_prefix = str(pr_info.number) + "/" + pr_info.sha + "/" + context_name_for_path
+
+    url = s3_helper.upload_build_file_to_s3(report_path, s3_path_prefix + "/report.html")
+    logging.info("Report url %s", url)
+
+    total_builds = len(build_results)
+    ok_builds = 0
+    summary_status = "success"
+    for build_result in build_results:
+        if build_result.status == "failure" and summary_status != "error":
+            summary_status = "failure"
+        if build_result.status == "error" or not build_result.status:
+            summary_status = "error"
+
+        if build_result.status == "success":
+            ok_builds += 1
+
+    description = "{}/{} builds are OK".format(ok_builds, total_builds)
+
+    print("::notice ::Report url: {}".format(url))
+
+    commit = get_commit(gh, pr_info.sha)
+    commit.create_status(context=build_check_name, description=description, state=summary_status, target_url=url)
--- a/tests/ci/fast_test_check.py
+++ b/tests/ci/fast_test_check.py
@ -0,0 +1,195 @@
+#!/usr/bin/env python3
+
+import logging
+import subprocess
+import os
+import json
+import time
+from pr_info import PRInfo
+from report import create_test_html_report
+from s3_helper import S3Helper
+from github import Github
+from get_robot_token import get_best_robot_token, get_parameter_from_ssm
+import csv
+
+NAME = 'Fast test (actions)'
+
+def get_fasttest_cmd(workspace, output_path, ccache_path, repo_path, pr_number, commit_sha, image):
+    return f"docker run --cap-add=SYS_PTRACE " \
+        f"-e FASTTEST_WORKSPACE=/fasttest-workspace -e FASTTEST_OUTPUT=/test_output " \
+        f"-e FASTTEST_SOURCE=/ClickHouse --cap-add=SYS_PTRACE " \
+        f"-e PULL_REQUEST_NUMBER={pr_number} -e COMMIT_SHA={commit_sha} -e COPY_CLICKHOUSE_BINARY_TO_OUTPUT=1 " \
+        f"--volume={workspace}:/fasttest-workspace --volume={repo_path}:/ClickHouse --volume={output_path}:/test_output "\
+        f"--volume={ccache_path}:/fasttest-workspace/ccache {image}"
+
+
+def process_results(result_folder):
+    test_results = []
+    additional_files = []
+    # Just upload all files from result_folder.
+    # If task provides processed results, then it's responsible for content of result_folder.
+    if os.path.exists(result_folder):
+        test_files = [f for f in os.listdir(result_folder) if os.path.isfile(os.path.join(result_folder, f))]
+        additional_files = [os.path.join(result_folder, f) for f in test_files]
+
+    status_path = os.path.join(result_folder, "check_status.tsv")
+    logging.info("Found test_results.tsv")
+    status = list(csv.reader(open(status_path, 'r'), delimiter='\t'))
+    if len(status) != 1 or len(status[0]) != 2:
+        return "error", "Invalid check_status.tsv", test_results, additional_files
+    state, description = status[0][0], status[0][1]
+
+    results_path = os.path.join(result_folder, "test_results.tsv")
+    test_results = list(csv.reader(open(results_path, 'r'), delimiter='\t'))
+    if len(test_results) == 0:
+        raise Exception("Empty results")
+
+    return state, description, test_results, additional_files
+
+
+def process_logs(s3_client, additional_logs, s3_path_prefix):
+    additional_urls = []
+    for log_path in additional_logs:
+        if log_path:
+            additional_urls.append(
+                s3_client.upload_test_report_to_s3(
+                    log_path,
+                    s3_path_prefix + "/" + os.path.basename(log_path)))
+
+    return additional_urls
+
+
+def upload_results(s3_client, pr_number, commit_sha, test_results, raw_log, additional_files):
+    additional_files = [raw_log] + additional_files
+    s3_path_prefix = f"{pr_number}/{commit_sha}/fasttest"
+    additional_urls = process_logs(s3_client, additional_files, s3_path_prefix)
+
+    branch_url = "https://github.com/ClickHouse/ClickHouse/commits/master"
+    branch_name = "master"
+    if pr_number != 0:
+        branch_name = "PR #{}".format(pr_number)
+        branch_url = "https://github.com/ClickHouse/ClickHouse/pull/" + str(pr_number)
+    commit_url = f"https://github.com/ClickHouse/ClickHouse/commit/{commit_sha}"
+
+    task_url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}"
+
+    raw_log_url = additional_urls[0]
+    additional_urls.pop(0)
+
+    html_report = create_test_html_report(NAME, test_results, raw_log_url, task_url, branch_url, branch_name, commit_url, additional_urls, True)
+    with open('report.html', 'w') as f:
+        f.write(html_report)
+
+    url = s3_client.upload_test_report_to_s3('report.html', s3_path_prefix + ".html")
+    logging.info("Search result in url %s", url)
+    return url
+
+def get_commit(gh, commit_sha):
+    repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse"))
+    commit = repo.get_commit(commit_sha)
+    return commit
+
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+    temp_path = os.getenv("TEMP_PATH", os.path.abspath("."))
+    caches_path = os.getenv("CACHES_PATH", temp_path)
+
+    if not os.path.exists(temp_path):
+        os.makedirs(temp_path)
+
+    with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file:
+        event = json.load(event_file)
+
+    pr_info = PRInfo(event)
+
+    gh = Github(get_best_robot_token())
+
+    images_path = os.path.join(temp_path, 'changed_images.json')
+    docker_image = 'clickhouse/fasttest'
+    if os.path.exists(images_path):
+        logging.info("Images file exists")
+        with open(images_path, 'r') as images_fd:
+            images = json.load(images_fd)
+            logging.info("Got images %s", images)
+            if 'clickhouse/fasttest' in images:
+                docker_image += ':' + images['clickhouse/pvs-test']
+
+    logging.info("Got docker image %s", docker_image)
+    for i in range(10):
+        try:
+            subprocess.check_output(f"docker pull {docker_image}", shell=True)
+            break
+        except Exception as ex:
+            time.sleep(i * 3)
+            logging.info("Got execption pulling docker %s", ex)
+    else:
+        raise Exception(f"Cannot pull dockerhub for image {docker_image}")
+
+
+    s3_helper = S3Helper('https://s3.amazonaws.com')
+
+    workspace = os.path.join(temp_path, "fasttest-workspace")
+    if not os.path.exists(workspace):
+        os.makedirs(workspace)
+
+    output_path = os.path.join(temp_path, "fasttest-output")
+    if not os.path.exists(output_path):
+        os.makedirs(output_path)
+
+    cache_path = os.path.join(caches_path, "fasttest")
+    if not os.path.exists(cache_path):
+        os.makedirs(cache_path)
+
+    repo_path = os.path.join(temp_path, "fasttest-repo")
+    if not os.path.exists(repo_path):
+        os.makedirs(repo_path)
+
+    run_cmd = get_fasttest_cmd(workspace, output_path, cache_path, repo_path, pr_info.number, pr_info.sha, docker_image)
+    logging.info("Going to run fasttest with cmd %s", run_cmd)
+
+    logs_path = os.path.join(temp_path, "fasttest-logs")
+    if not os.path.exists(logs_path):
+        os.makedirs(logs_path)
+
+    run_log_path = os.path.join(logs_path, 'runlog.log')
+    with open(run_log_path, 'w') as log:
+        retcode = subprocess.Popen(run_cmd, shell=True, stderr=log, stdout=log).wait()
+        if retcode == 0:
+            logging.info("Run successfully")
+        else:
+            logging.info("Run failed")
+
+    subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
+    subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {cache_path}", shell=True)
+
+    test_output_files = os.listdir(output_path)
+    additional_logs = []
+    for f in test_output_files:
+        additional_logs.append(os.path.join(output_path, f))
+
+    test_log_exists = 'test_log.txt' in test_output_files or 'test_result.txt' in test_output_files
+    test_result_exists = 'test_results.tsv' in test_output_files
+    test_results = []
+    if 'submodule_log.txt' not in test_output_files:
+        description = "Cannot clone repository"
+        state = "failure"
+    elif 'cmake_log.txt' not in test_output_files:
+        description = "Cannot fetch submodules"
+        state = "failure"
+    elif 'build_log.txt' not in test_output_files:
+        description = "Cannot finish cmake"
+        state = "failure"
+    elif 'install_log.txt' not in test_output_files:
+        description = "Cannot build ClickHouse"
+        state = "failure"
+    elif not test_log_exists and not test_result_exists:
+        description = "Cannot install or start ClickHouse"
+        state = "failure"
+    else:
+        state, description, test_results, additional_logs = process_results(output_path)
+
+    report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, run_log_path, additional_logs)
+    print("::notice ::Report url: {}".format(report_url))
+    commit = get_commit(gh, pr_info.sha)
+    commit.create_status(context=NAME, description=description, state=state, target_url=report_url)
--- a/tests/ci/metrics_lambda/app.py
+++ b/tests/ci/metrics_lambda/app.py
@ -67,6 +67,24 @@ def list_runners(access_token):
        result.append(desc)
    return result

+def group_runners_by_tag(listed_runners):
+    result = {}
+
+    RUNNER_TYPE_LABELS = ['style-checker', 'builder']
+    for runner in listed_runners:
+        for tag in runner.tags:
+            if tag in RUNNER_TYPE_LABELS:
+                if tag not in result:
+                    result[tag] = []
+                result[tag].append(runner)
+                break
+        else:
+            if 'unlabeled' not in result:
+                result['unlabeled'] = []
+            result['unlabeled'].append(runner)
+    return result
+
+
 def push_metrics_to_cloudwatch(listed_runners, namespace):
    import boto3
    client = boto3.client('cloudwatch')
@ -100,7 +118,7 @@ def push_metrics_to_cloudwatch(listed_runners, namespace):
        'Unit': 'Percent',
    })

-    client.put_metric_data(Namespace='RunnersMetrics', MetricData=metrics_data)
+    client.put_metric_data(Namespace=namespace, MetricData=metrics_data)

 def main(github_secret_key, github_app_id, push_to_cloudwatch):
    payload = {
@ -113,10 +131,12 @@ def main(github_secret_key, github_app_id, push_to_cloudwatch):
    installation_id = get_installation_id(encoded_jwt)
    access_token = get_access_token(encoded_jwt, installation_id)
    runners = list_runners(access_token)
-    if push_to_cloudwatch:
-        push_metrics_to_cloudwatch(runners, 'RunnersMetrics')
-    else:
-        print(runners)
+    grouped_runners = group_runners_by_tag(runners)
+    for group, group_runners in grouped_runners.items():
+        if push_to_cloudwatch:
+            push_metrics_to_cloudwatch(group_runners, 'RunnersMetrics/' + group)
+        else:
+            print(group, group_runners)


 if __name__ == "__main__":
--- a/tests/ci/pr_info.py
+++ b/tests/ci/pr_info.py
@ -39,3 +39,9 @@ class PRInfo:
            'user_login': self.user_login,
            'user_orgs': self.user_orgs,
        }
+
+
+class FakePRInfo:
+    def __init__(self):
+        self.number = 11111
+        self.sha = "xxxxxxxxxxxxxxxxxx"
--- a/tests/ci/pvs_check.py
+++ b/tests/ci/pvs_check.py
@ -9,7 +9,7 @@ from s3_helper import S3Helper
 from pr_info import PRInfo
 import shutil
 import sys
-from get_robot_token import get_best_robot_token
+from get_robot_token import get_best_robot_token, get_parameter_from_ssm

 NAME = 'PVS Studio (actions)'
 LICENCE_NAME = 'Free license: ClickHouse, Yandex'
@ -97,7 +97,7 @@ if __name__ == "__main__":

    s3_helper = S3Helper('https://s3.amazonaws.com')

-    licence_key = os.getenv('PVS_STUDIO_KEY')
+    licence_key = get_parameter_from_ssm('pvs_studio_key')
    cmd = f"docker run -u $(id -u ${{USER}}):$(id -g ${{USER}}) --volume={repo_path}:/repo_folder --volume={temp_path}:/test_output -e LICENCE_NAME='{LICENCE_NAME}' -e LICENCE_KEY='{licence_key}' {docker_image}"
    commit = get_commit(gh, pr_info.sha)

--- a/tests/ci/report.py
+++ b/tests/ci/report.py
@ -32,6 +32,9 @@ table {{ border: 0; }}
 .main {{ margin-left: 10%; }}
 p.links a {{ padding: 5px; margin: 3px; background: #FFF; line-height: 2; white-space: nowrap; box-shadow: 0 0 0 1px rgba(0, 0, 0, 0.05), 0 8px 25px -5px rgba(0, 0, 0, 0.1); }}
 th {{ cursor: pointer; }}
+.failed {{ cursor: pointer; }}
+.failed-content.open {{}}
+.failed-content {{ display: none; }}

  </style>
  <title>{title}</title>
@ -51,7 +54,13 @@ th {{ cursor: pointer; }}
 <script type="text/javascript">
    /// Straight from https://stackoverflow.com/questions/14267781/sorting-html-table-with-javascript

-    const getCellValue = (tr, idx) => tr.children[idx].innerText || tr.children[idx].textContent;
+    const getCellValue = (tr, idx) => {{
+        var classes = tr.classList;
+        var elem = tr;
+        if (classes.contains("failed-content") || classes.contains("failed-content.open"))
+            elem = tr.previousElementSibling;
+        return elem.children[idx].innerText || elem.children[idx].textContent;
+    }}

    const comparer = (idx, asc) => (a, b) => ((v1, v2) =>
        v1 !== '' && v2 !== '' && !isNaN(v1) && !isNaN(v2) ? v1 - v2 : v1.toString().localeCompare(v2)
@ -64,6 +73,12 @@ th {{ cursor: pointer; }}
            .sort(comparer(Array.from(th.parentNode.children).indexOf(th), this.asc = !this.asc))
            .forEach(tr => table.appendChild(tr) );
    }})));
+
+    Array.from(document.getElementsByClassName("failed")).forEach(tr => tr.addEventListener('click', function() {{
+        var content = this.nextElementSibling;
+        content.classList.toggle("failed-content.open");
+        content.classList.toggle("failed-content");
+    }}));
 </script>
 </html>
 """
@ -107,13 +122,13 @@ def _get_status_style(status):

 def _get_html_url(url):
    if isinstance(url, str):
-        return '<a href="{url}">{name}</a>'.format(url=url, name=os.path.basename(url))
+        return '<a href="{url}">{name}</a>'.format(url=url, name=os.path.basename(url).replace('%2B', '+').replace('%20', ' '))
    if isinstance(url, tuple):
-        return '<a href="{url}">{name}</a>'.format(url=url[0], name=url[1])
+        return '<a href="{url}">{name}</a>'.format(url=url[0], name=url[1].replace('%2B', '+').replace('%20', ' '))
    return ''


-def create_test_html_report(header, test_result, raw_log_url, task_url, branch_url, branch_name, commit_url, additional_urls=[]):
+def create_test_html_report(header, test_result, raw_log_url, task_url, branch_url, branch_name, commit_url, additional_urls=[], with_raw_logs=False):
    if test_result:
        rows_part = ""
        num_fails = 0
@ -134,11 +149,13 @@ def create_test_html_report(header, test_result, raw_log_url, task_url, branch_u
                has_test_logs = True

            row = "<tr>"
+            is_fail = test_status == "FAIL" or test_status == 'FLAKY'
+            if is_fail and with_raw_logs and test_logs is not None:
+                row = "<tr class=\"failed\">"
            row += "<td>" + test_name + "</td>"
            style = _get_status_style(test_status)

            # Allow to quickly scroll to the first failure.
-            is_fail = test_status == "FAIL" or test_status == 'FLAKY'
            is_fail_id = ""
            if is_fail:
                num_fails = num_fails + 1
@ -149,17 +166,23 @@ def create_test_html_report(header, test_result, raw_log_url, task_url, branch_u
            if test_time is not None:
                row += "<td>" + test_time + "</td>"

-            if test_logs is not None:
+            if test_logs is not None and not with_raw_logs:
                test_logs_html = "<br>".join([_get_html_url(url) for url in test_logs])
                row += "<td>" + test_logs_html + "</td>"

            row += "</tr>"
            rows_part += row
+            if test_logs is not None and with_raw_logs:
+                row = "<tr class=\"failed-content\">"
+                # TODO: compute colspan too
+                row += "<td colspan=\"3\"><pre>" + test_logs + "</pre></td>"
+                row += "</tr>"
+                rows_part += row

        headers = BASE_HEADERS
        if has_test_time:
            headers.append('Test time, sec.')
-        if has_test_logs:
+        if has_test_logs and not with_raw_logs:
            headers.append('Logs')

        headers = ''.join(['<th>' + h + '</th>' for h in headers])
@ -235,7 +258,7 @@ tr:hover td {{filter: brightness(95%);}}
 </table>
 <p class="links">
 <a href="{commit_url}">Commit</a>
-<a href="{task_url}">Task (private network)</a>
+<a href="{task_url}">Task (github actions)</a>
 </p>
 </body>
 </html>
@ -281,7 +304,7 @@ def create_build_html_report(header, build_results, build_logs_urls, artifact_ur
        link_separator = "<br/>"
        if artifact_urls:
            for artifact_url in artifact_urls:
-                links += LINK_TEMPLATE.format(text=os.path.basename(artifact_url), url=artifact_url)
+                links += LINK_TEMPLATE.format(text=os.path.basename(artifact_url.replace('%2B', '+').replace('%20', ' ')), url=artifact_url)
                links += link_separator
            if links:
                links = links[:-len(link_separator)]
--- a/tests/ci/run_check.py
+++ b/tests/ci/run_check.py
@ -122,5 +122,9 @@ if __name__ == "__main__":
        commit.create_status(context=NAME, description=description, state="failure", target_url=url)
        sys.exit(1)
    else:
-        print("::notice ::Can run")
-        commit.create_status(context=NAME, description=description, state="pending", target_url=url)
+        if 'pr-documentation' in pr_info.labels or 'pr-doc-fix' in pr_info.labels:
+            commit.create_status(context=NAME, description="Skipping checks for documentation", state="success", target_url=url)
+            print("::notice ::Can run, but it's documentation PR, skipping")
+        else:
+            print("::notice ::Can run")
+            commit.create_status(context=NAME, description=description, state="pending", target_url=url)
--- a/tests/ci/s3_helper.py
+++ b/tests/ci/s3_helper.py
@ -56,7 +56,8 @@ class S3Helper(object):

        self.client.upload_file(file_path, bucket_name, s3_path, ExtraArgs=metadata)
        logging.info("Upload {} to {}. Meta: {}".format(file_path, s3_path, metadata))
-        return "https://s3.amazonaws.com/{bucket}/{path}".format(bucket=bucket_name, path=s3_path)
+        # last two replacements are specifics of AWS urls: https://jamesd3142.wordpress.com/2018/02/28/amazon-s3-and-the-plus-symbol/
+        return "https://s3.amazonaws.com/{bucket}/{path}".format(bucket=bucket_name, path=s3_path).replace('+', '%2B').replace(' ', '%20')

    def upload_test_report_to_s3(self, file_path, s3_path):
        return self._upload_file_to_s3('clickhouse-test-reports', file_path, s3_path)
--- a/tests/ci/style_check.py
+++ b/tests/ci/style_check.py
@ -85,15 +85,6 @@ def get_commit(gh, commit_sha):
    commit = repo.get_commit(commit_sha)
    return commit

-def update_check_with_curl(check_id):
-    cmd_template = ("curl -v --request PATCH --url https://api.github.com/repos/ClickHouse/ClickHouse/check-runs/{} "
-           "--header 'authorization: Bearer {}' "
-           "--header 'Accept: application/vnd.github.v3+json' "
-           "--header 'content-type: application/json' "
-           "-d '{{\"name\" : \"hello-world-name\"}}'")
-    cmd = cmd_template.format(check_id, os.getenv("GITHUB_TOKEN"))
-    subprocess.check_call(cmd, shell=True)
-
 if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)
    repo_path = os.path.join(os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../")))
--- a/tests/ci/version_helper.py
+++ b/tests/ci/version_helper.py
@ -0,0 +1,139 @@
+#!/usr/bin/env python3
+import os
+import subprocess
+import datetime
+
+FILE_WITH_VERSION_PATH = "cmake/autogenerated_versions.txt"
+CHANGELOG_IN_PATH = "debian/changelog.in"
+CHANGELOG_PATH = "debian/changelog"
+CONTRIBUTORS_SCRIPT_DIR = "src/Storages/System/"
+
+
+class ClickHouseVersion(object):
+    def __init__(self, major, minor, patch, tweak, revision):
+        self.major = major
+        self.minor = minor
+        self.patch = patch
+        self.tweak = tweak
+        self.revision = revision
+
+    def minor_update(self):
+        return ClickHouseVersion(
+            self.major,
+            self.minor + 1,
+            1,
+            1,
+            self.revision + 1)
+
+    def patch_update(self):
+        return ClickHouseVersion(
+            self.major,
+            self.minor,
+            self.patch + 1,
+            1,
+            self.revision)
+
+    def tweak_update(self):
+        return ClickHouseVersion(
+            self.major,
+            self.minor,
+            self.patch,
+            self.tweak + 1,
+            self.revision)
+
+    def get_version_string(self):
+        return '.'.join([
+            str(self.major),
+            str(self.minor),
+            str(self.patch),
+            str(self.tweak)
+        ])
+
+    def as_tuple(self):
+        return (self.major, self.minor, self.patch, self.tweak)
+
+
+class VersionType(object):
+    STABLE = "stable"
+    TESTING = "testing"
+
+
+def build_version_description(version, version_type):
+    return "v" + version.get_version_string() + "-" + version_type
+
+
+def _get_version_from_line(line):
+    _, ver_with_bracket = line.strip().split(' ')
+    return ver_with_bracket[:-1]
+
+
+def get_version_from_repo(repo_path):
+    path_to_file = os.path.join(repo_path, FILE_WITH_VERSION_PATH)
+    major = 0
+    minor = 0
+    patch = 0
+    tweak = 0
+    version_revision = 0
+    with open(path_to_file, 'r') as ver_file:
+        for line in ver_file:
+            if "VERSION_MAJOR" in line and "math" not in line and "SET" in line:
+                major = _get_version_from_line(line)
+            elif "VERSION_MINOR" in line and "math" not in line and "SET" in line:
+                minor = _get_version_from_line(line)
+            elif "VERSION_PATCH" in line and "math" not in line and "SET" in line:
+                patch = _get_version_from_line(line)
+            elif "VERSION_REVISION" in line and "math" not in line:
+                version_revision = _get_version_from_line(line)
+    return ClickHouseVersion(major, minor, patch, tweak, version_revision)
+
+
+def _update_cmake_version(repo_path, version, sha, version_type):
+    cmd = """sed -i --follow-symlinks -e "s/SET(VERSION_REVISION [^) ]*/SET(VERSION_REVISION {revision}/g;" \
+            -e "s/SET(VERSION_DESCRIBE [^) ]*/SET(VERSION_DESCRIBE {version_desc}/g;" \
+            -e "s/SET(VERSION_GITHASH [^) ]*/SET(VERSION_GITHASH {sha}/g;" \
+            -e "s/SET(VERSION_MAJOR [^) ]*/SET(VERSION_MAJOR {major}/g;" \
+            -e "s/SET(VERSION_MINOR [^) ]*/SET(VERSION_MINOR {minor}/g;" \
+            -e "s/SET(VERSION_PATCH [^) ]*/SET(VERSION_PATCH {patch}/g;" \
+            -e "s/SET(VERSION_STRING [^) ]*/SET(VERSION_STRING {version_string}/g;" \
+            {path}""".format(
+        revision=version.revision,
+        version_desc=build_version_description(version, version_type),
+        sha=sha,
+        major=version.major,
+        minor=version.minor,
+        patch=version.patch,
+        version_string=version.get_version_string(),
+        path=os.path.join(repo_path, FILE_WITH_VERSION_PATH),
+    )
+    subprocess.check_call(cmd, shell=True)
+
+
+def _update_changelog(repo_path, version):
+    cmd = """sed \
+        -e "s/[@]VERSION_STRING[@]/{version_str}/g" \
+        -e "s/[@]DATE[@]/{date}/g" \
+        -e "s/[@]AUTHOR[@]/clickhouse-release/g" \
+        -e "s/[@]EMAIL[@]/clickhouse-release@yandex-team.ru/g" \
+        < {in_path} > {changelog_path}
+    """.format(
+        version_str=version.get_version_string(),
+        date=datetime.datetime.now().strftime("%a, %d %b %Y %H:%M:%S") + " +0300",
+        in_path=os.path.join(repo_path, CHANGELOG_IN_PATH),
+        changelog_path=os.path.join(repo_path, CHANGELOG_PATH)
+    )
+    subprocess.check_call(cmd, shell=True)
+
+def _update_contributors(repo_path):
+    cmd = "cd {} && ./StorageSystemContributors.sh".format(os.path.join(repo_path, CONTRIBUTORS_SCRIPT_DIR))
+    subprocess.check_call(cmd, shell=True)
+
+def _update_dockerfile(repo_path, version):
+    version_str_for_docker = '.'.join([str(version.major), str(version.minor), str(version.patch), '*'])
+    cmd = "ls -1 {path}/docker/*/Dockerfile | xargs sed -i -r -e 's/ARG version=.+$/ARG version='{ver}'/'".format(path=repo_path, ver=version_str_for_docker)
+    subprocess.check_call(cmd, shell=True)
+
+def update_version_local(repo_path, sha, version, version_type="testing"):
+    _update_contributors(repo_path)
+    _update_cmake_version(repo_path, version, sha, version_type)
+    _update_changelog(repo_path, version)
+    _update_dockerfile(repo_path, version)
--- a/tests/ci/worker/init_builder.sh
+++ b/tests/ci/worker/init_builder.sh
@ -0,0 +1,20 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+echo "Running init script"
+export DEBIAN_FRONTEND=noninteractive
+export RUNNER_HOME=/home/ubuntu/actions-runner
+
+echo "Receiving token"
+export RUNNER_TOKEN=`/usr/local/bin/aws ssm  get-parameter --name github_runner_registration_token --with-decryption --output text --query Parameter.Value`
+export RUNNER_URL="https://github.com/ClickHouse"
+# Funny fact, but metadata service has fixed IP
+export INSTANCE_ID=`curl -s http://169.254.169.254/latest/meta-data/instance-id`
+
+cd $RUNNER_HOME
+
+echo "Going to configure runner"
+sudo -u ubuntu ./config.sh --url $RUNNER_URL --token $RUNNER_TOKEN --name $INSTANCE_ID --runnergroup Default --labels 'self-hosted,Linux,X64,builder' --work _work
+
+echo "Run"
+sudo -u ubuntu ./run.sh
--- a/tests/ci/worker/init_style_checker.sh
+++ b/tests/ci/worker/init_style_checker.sh
@ -14,7 +14,7 @@ export INSTANCE_ID=`curl -s http://169.254.169.254/latest/meta-data/instance-id`
 cd $RUNNER_HOME

 echo "Going to configure runner"
-sudo -u ubuntu ./config.sh --url $RUNNER_URL --token $RUNNER_TOKEN --name $INSTANCE_ID --runnergroup Default --labels 'self-hosted,Linux,X64' --work _work
+sudo -u ubuntu ./config.sh --url $RUNNER_URL --token $RUNNER_TOKEN --name $INSTANCE_ID --runnergroup Default --labels 'self-hosted,Linux,X64,style-checker' --work _work

 echo "Run"
 sudo -u ubuntu ./run.sh
--- a/tests/ci/worker/ubuntu_style_check.sh
+++ b/tests/ci/worker/ubuntu_style_check.sh
@ -27,6 +27,16 @@ apt-get install --yes --no-install-recommends docker-ce docker-ce-cli containerd

 usermod -aG docker ubuntu

+# enable ipv6 in containers (fixed-cidr-v6 is some random network mask)
+cat <<EOT > /etc/docker/daemon.json
+{
+  "ipv6": true,
+  "fixed-cidr-v6": "2001:db8:1::/64"
+}
+EOT
+
+systemctl restart docker
+
 pip install boto3 pygithub requests urllib3 unidiff

 mkdir -p $RUNNER_HOME && cd $RUNNER_HOME
--- a/tests/integration/test_backward_compatibility/test_aggregate_fixed_key.py
+++ b/tests/integration/test_backward_compatibility/test_aggregate_fixed_key.py
@ -0,0 +1,61 @@
+import pytest
+
+from helpers.cluster import ClickHouseCluster
+
+cluster = ClickHouseCluster(__file__)
+node1 = cluster.add_instance('node1', with_zookeeper=True, image='yandex/clickhouse-server', tag='21.3', with_installed_binary=True)
+node2 = cluster.add_instance('node2', with_zookeeper=True, image='yandex/clickhouse-server')
+node3 = cluster.add_instance('node3', with_zookeeper=True, image='yandex/clickhouse-server')
+
+
+@pytest.fixture(scope="module")
+def start_cluster():
+    try:
+        cluster.start()
+        yield cluster
+
+    finally:
+        cluster.shutdown()
+
+
+def test_two_level_merge(start_cluster):
+    for node in start_cluster.instances.values():
+        node.query(
+            """
+            CREATE TABLE IF NOT EXISTS test_two_level_merge(date Date, zone UInt32, number UInt32)
+            ENGINE = MergeTree() PARTITION BY toUInt64(number / 1000) ORDER BY tuple();
+
+            INSERT INTO
+                test_two_level_merge
+            SELECT
+                toDate('2021-09-28') - number / 1000,
+                249081628,
+                number
+            FROM
+                numbers(15000);
+            """
+        )
+
+    # covers only the keys64 method
+    for node in start_cluster.instances.values():
+        print(node.query(
+            """
+            SELECT
+                throwIf(uniqExact(date) != count(), 'group by is borked')
+            FROM (
+                SELECT
+                    date
+                FROM
+                    remote('node{1,2}', default.test_two_level_merge)
+                WHERE
+                    date BETWEEN toDate('2021-09-20') AND toDate('2021-09-28')
+                    AND zone = 249081628
+                GROUP by date, zone
+            )
+            SETTINGS
+                group_by_two_level_threshold = 1,
+                group_by_two_level_threshold_bytes = 1,
+                max_threads = 2,
+                prefer_localhost_replica = 0
+            """
+        ))
--- a/tests/performance/hashed_array_dictionary.xml
+++ b/tests/performance/hashed_array_dictionary.xml
@ -103,6 +103,11 @@
        FORMAT Null;
    </query>

+    <query>
+        SELECT * FROM simple_key_hashed_array_dictionary
+        FORMAT Null;
+    </query>
+
    <query>
        WITH (rand64() % toUInt64({elements_count}), toString(rand64() % toUInt64({elements_count}))) as key
        SELECT dictGet('default.complex_key_hashed_array_dictionary', {column_name}, key)
@ -119,6 +124,11 @@
        FORMAT Null;
    </query>

+    <query>
+        SELECT * FROM complex_key_hashed_array_dictionary
+        FORMAT Null;
+    </query>
+
    <drop_query>DROP TABLE IF EXISTS simple_key_hashed_array_dictionary_source_table;</drop_query>
    <drop_query>DROP TABLE IF EXISTS complex_key_hashed_array_dictionary_source_table;</drop_query>

--- a/tests/performance/hashed_dictionary.xml
+++ b/tests/performance/hashed_dictionary.xml
@ -103,6 +103,11 @@
        FORMAT Null;
    </query>

+    <query>
+        SELECT * FROM simple_key_hashed_dictionary
+        FORMAT Null;
+    </query>
+
    <query>
        WITH (rand64() % toUInt64({elements_count}), toString(rand64() % toUInt64({elements_count}))) as key
        SELECT dictGet('default.complex_key_hashed_dictionary', {column_name}, key)
@ -119,6 +124,11 @@
        FORMAT Null;
    </query>

+    <query>
+        SELECT * FROM complex_key_hashed_dictionary
+        FORMAT Null;
+    </query>
+
    <drop_query>DROP TABLE IF EXISTS simple_key_hashed_dictionary_source_table;</drop_query>
    <drop_query>DROP TABLE IF EXISTS complex_key_hashed_dictionary_source_table;</drop_query>

--- a/tests/queries/0_stateless/01506_buffer_table_alter_block_structure_2.reference
+++ b/tests/queries/0_stateless/01506_buffer_table_alter_block_structure_2.reference
@ -0,0 +1,3 @@
+2020-01-01 00:05:00	
+2020-01-01 00:05:00	
+2020-01-01 00:06:00	hello
--- a/tests/queries/0_stateless/01506_buffer_table_alter_block_structure_2.sql
+++ b/tests/queries/0_stateless/01506_buffer_table_alter_block_structure_2.sql
@ -0,0 +1,25 @@
+DROP TABLE IF EXISTS buf_dest;
+DROP TABLE IF EXISTS buf;
+
+CREATE TABLE buf_dest (timestamp DateTime)
+ENGINE = MergeTree PARTITION BY toYYYYMMDD(timestamp)
+ORDER BY (timestamp);
+
+CREATE TABLE buf (timestamp DateTime) Engine = Buffer(currentDatabase(), buf_dest, 16, 0.1, 0.1, 2000000, 20000000, 100000000, 300000000);;
+
+INSERT INTO buf (timestamp) VALUES (toDateTime('2020-01-01 00:05:00'));
+
+--- wait for buffer to flush
+SELECT sleep(1) from numbers(1) settings max_block_size=1 format Null;
+
+ALTER TABLE buf_dest ADD COLUMN s String;
+ALTER TABLE buf ADD COLUMN s String;
+
+SELECT * FROM buf;
+
+INSERT INTO buf (timestamp, s) VALUES (toDateTime('2020-01-01 00:06:00'), 'hello');
+
+SELECT * FROM buf ORDER BY timestamp;
+
+DROP TABLE IF EXISTS buf;
+DROP TABLE IF EXISTS buf_dest;
--- a/tests/queries/0_stateless/01507_clickhouse_server_start_with_embedded_config.sh
+++ b/tests/queries/0_stateless/01507_clickhouse_server_start_with_embedded_config.sh
@ -12,7 +12,7 @@ echo "Starting clickhouse-server"

 $PORT

-$CLICKHOUSE_BINARY-server -- --tcp_port "$CLICKHOUSE_PORT_TCP" > server.log 2>&1 &
+$CLICKHOUSE_BINARY server -- --tcp_port "$CLICKHOUSE_PORT_TCP" --path /tmp/ > server.log 2>&1 &
 PID=$!

 function finish {
--- a/tests/queries/0_stateless/01854_HTTP_dict_decompression.python
+++ b/tests/queries/0_stateless/01854_HTTP_dict_decompression.python
@ -13,12 +13,25 @@ import urllib.request
 import subprocess
 import lzma

-def get_local_port(host):
-    with socket.socket() as fd:
+
+def is_ipv6(host):
+    try:
+        socket.inet_aton(host)
+        return False
+    except:
+        return True
+
+def get_local_port(host, ipv6):
+    if ipv6:
+        family = socket.AF_INET6
+    else:
+        family = socket.AF_INET
+
+    with socket.socket(family) as fd:
        fd.bind((host, 0))
        return fd.getsockname()[1]

-CLICKHOUSE_HOST = os.environ.get('CLICKHOUSE_HOST', '127.0.0.1')
+CLICKHOUSE_HOST = os.environ.get('CLICKHOUSE_HOST', 'localhost')
 CLICKHOUSE_PORT_HTTP = os.environ.get('CLICKHOUSE_PORT_HTTP', '8123')

 #####################################################################################
@ -30,11 +43,15 @@ CLICKHOUSE_PORT_HTTP = os.environ.get('CLICKHOUSE_PORT_HTTP', '8123')

 # IP-address of this host accessible from the outside world. Get the first one
 HTTP_SERVER_HOST = subprocess.check_output(['hostname', '-i']).decode('utf-8').strip().split()[0]
-HTTP_SERVER_PORT = get_local_port(HTTP_SERVER_HOST)
+IS_IPV6 = is_ipv6(HTTP_SERVER_HOST)
+HTTP_SERVER_PORT = get_local_port(HTTP_SERVER_HOST, IS_IPV6)

 # IP address and port of the HTTP server started from this script.
 HTTP_SERVER_ADDRESS = (HTTP_SERVER_HOST, HTTP_SERVER_PORT)
-HTTP_SERVER_URL_STR = 'http://' + ':'.join(str(s) for s in HTTP_SERVER_ADDRESS) + "/"
+if IS_IPV6:
+    HTTP_SERVER_URL_STR = 'http://' + f'[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}' + "/"
+else:
+    HTTP_SERVER_URL_STR = 'http://' + f'{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}' + "/"

 # Because we need to check the content of file.csv we can create this content and avoid reading csv
 CSV_DATA = "Hello, 1\nWorld, 2\nThis, 152\nis, 9283\ntesting, 2313213\ndata, 555\n"
@ -48,6 +65,10 @@ ENDINGS = ['.gz', '.xz']
 SEND_ENCODING = True

 def get_ch_answer(query):
+    host = CLICKHOUSE_HOST
+    if IS_IPV6:
+        host = f'[{host}]'
+
    url = os.environ.get('CLICKHOUSE_URL', 'http://{host}:{port}'.format(host=CLICKHOUSE_HOST, port=CLICKHOUSE_PORT_HTTP))
    return urllib.request.urlopen(url, data=query.encode()).read().decode()

@ -98,8 +119,14 @@ class HttpProcessor(SimpleHTTPRequestHandler):
    def log_message(self, format, *args):
        return

+class HTTPServerV6(HTTPServer):
+    address_family = socket.AF_INET6
+
 def start_server(requests_amount):
-    httpd = HTTPServer(HTTP_SERVER_ADDRESS, HttpProcessor)
+    if IS_IPV6:
+        httpd = HTTPServerV6(HTTP_SERVER_ADDRESS, HttpProcessor)
+    else:
+        httpd = HTTPServer(HTTP_SERVER_ADDRESS, HttpProcessor)

    def real_func():
        for i in range(requests_amount):
@ -127,7 +154,7 @@ def test_select(dict_name="", schema="word String, counter UInt32", requests=[],
            PRIMARY KEY word
            SOURCE(HTTP(url '{}' format 'CSV'))
            LAYOUT(complex_key_hashed())
-            LIFETIME(0)'''.format(dict_name, schema, HTTP_SERVER_URL_STR+'/test.csv' + ADDING_ENDING))
+            LIFETIME(0)'''.format(dict_name, schema, HTTP_SERVER_URL_STR + '/test.csv' + ADDING_ENDING))

        COMPRESS_METHOD = requests[i]
        print(i, COMPRESS_METHOD, ADDING_ENDING, SEND_ENCODING)
--- a/tests/queries/0_stateless/02015_async_inserts_7.reference
+++ b/tests/queries/0_stateless/02015_async_inserts_7.reference
@ -0,0 +1,6 @@
+1	a
+2	b
+3	c
+4	d
+5	e
+6	f
--- a/tests/queries/0_stateless/02015_async_inserts_7.sh
+++ b/tests/queries/0_stateless/02015_async_inserts_7.sh
@ -0,0 +1,20 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=1"
+
+${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS async_inserts"
+${CLICKHOUSE_CLIENT} -q "CREATE TABLE async_inserts (id UInt32, s String) ENGINE = Memory"
+
+${CLICKHOUSE_CURL} -sS $url -d "INSERT INTO async_inserts VALUES (1, 'a') (2, 'b')" &
+${CLICKHOUSE_CURL} -sS $url -d "INSERT INTO async_inserts VALUES (3, 'c'), (4, 'd')" &
+${CLICKHOUSE_CURL} -sS $url -d "INSERT INTO async_inserts VALUES (5, 'e'), (6, 'f'), " &
+
+wait
+
+${CLICKHOUSE_CLIENT} -q "SELECT * FROM async_inserts ORDER BY id"
+
+${CLICKHOUSE_CLIENT} -q "DROP TABLE async_inserts"
--- a/tests/queries/0_stateless/02021_prewhere_always_true_where.reference
+++ b/tests/queries/0_stateless/02021_prewhere_always_true_where.reference
@ -0,0 +1 @@
+1
--- a/tests/queries/0_stateless/02021_prewhere_always_true_where.sql
+++ b/tests/queries/0_stateless/02021_prewhere_always_true_where.sql
@ -0,0 +1,5 @@
+drop table if exists data_02021;
+create table data_02021 (key Int) engine=MergeTree() order by key;
+insert into data_02021 values (1);
+select count() from data_02021 prewhere 1 or ignore(key) where ignore(key)=0;
+drop table data_02021;
--- a/tests/queries/0_stateless/02100_now64_types_bug.reference
+++ b/tests/queries/0_stateless/02100_now64_types_bug.reference
@ -0,0 +1,3 @@
+2
+1
+0
--- a/tests/queries/0_stateless/02100_now64_types_bug.sql
+++ b/tests/queries/0_stateless/02100_now64_types_bug.sql
@ -0,0 +1,8 @@
+SELECT x
+FROM
+(
+    SELECT if((number % NULL) = -2147483648, NULL, if(toInt64(toInt64(now64(if((number % NULL) = -2147483648, NULL, if(toInt64(now64(toInt64(9223372036854775807, now64(plus(NULL, NULL))), plus(NULL, NULL))) = (number % NULL), nan, toFloat64(number))), toInt64(9223372036854775807, toInt64(9223372036854775807, now64(plus(NULL, NULL))), now64(plus(NULL, NULL))), plus(NULL, NULL))), now64(toInt64(9223372036854775807, toInt64(0, now64(plus(NULL, NULL))), now64(plus(NULL, NULL))), plus(NULL, NULL))) = (number % NULL), nan, toFloat64(number))) AS x
+    FROM system.numbers
+    LIMIT 3
+)
+ORDER BY x DESC NULLS LAST
--- a/tests/queries/0_stateless/02110_clickhouse_local_custom_tld.config.xml
+++ b/tests/queries/0_stateless/02110_clickhouse_local_custom_tld.config.xml
@ -0,0 +1,22 @@
+<clickhouse>
+    <profiles>
+        <default></default>
+    </profiles>
+    <users>
+        <default>
+            <password></password>
+            <networks>
+                <ip>::/0</ip>
+            </networks>
+            <profile>default</profile>
+            <quota>default</quota>
+        </default>
+    </users>
+    <quotas>
+        <default></default>
+    </quotas>
+
+    <top_level_domains_lists>
+        <public_suffix_list>02110_clickhouse_local_custom_tld.tld.dat</public_suffix_list>
+    </top_level_domains_lists>
+</clickhouse>
--- a/tests/queries/0_stateless/02110_clickhouse_local_custom_tld.reference
+++ b/tests/queries/0_stateless/02110_clickhouse_local_custom_tld.reference
@ -0,0 +1 @@
+biz.ss	kernel.biz.ss
--- a/tests/queries/0_stateless/02110_clickhouse_local_custom_tld.sh
+++ b/tests/queries/0_stateless/02110_clickhouse_local_custom_tld.sh
@ -0,0 +1,21 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+function clickhouse_local()
+{
+    local opts=(
+        --config "$CURDIR/$(basename "${BASH_SOURCE[0]}" .sh).config.xml"
+        --top_level_domains_path "$CURDIR"
+    )
+    $CLICKHOUSE_LOCAL "${opts[@]}" "$@"
+}
+
+# -- biz.ss is not in the default TLD list, hence:
+clickhouse_local -q "
+    select
+        cutToFirstSignificantSubdomain('foo.kernel.biz.ss'),
+        cutToFirstSignificantSubdomainCustom('foo.kernel.biz.ss', 'public_suffix_list')
+" |& grep -v -e 'Processing configuration file'
--- a/tests/queries/0_stateless/02110_clickhouse_local_custom_tld.tld.dat
+++ b/tests/queries/0_stateless/02110_clickhouse_local_custom_tld.tld.dat
@ -0,0 +1 @@
+biz.ss
				`@ -1 +0,0 @@`
				`../../en/development/continuous-integration.md`
				`@ -1 +0,0 @@`
				`../../../en/operations/external-authenticators/kerberos.md`