Merge branch 'kssenii-fix-write-through-cache-logical-error' into stress_s3

2024-11-21 23:21:59 +00:00 · 2022-08-30 18:44:14 +02:00 · 2022-08-30 18:44:14 +02:00 · 5a0c5f0542
commit 5a0c5f0542
parent ac54f15c06 54f6f5bb2c
2001 changed files with 11156 additions and 2439 deletions
--- a/.gitmodules
+++ b/.gitmodules
@ -259,6 +259,10 @@
 [submodule "contrib/minizip-ng"]
 	path = contrib/minizip-ng
 	url = https://github.com/zlib-ng/minizip-ng
+[submodule "contrib/annoy"]
+	path = contrib/annoy
+	url = https://github.com/ClickHouse/annoy.git
+	branch = ClickHouse-master
 [submodule "contrib/qpl"]
 	path = contrib/qpl
 	url = https://github.com/intel/qpl.git
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -164,7 +164,6 @@ if (HAS_RESERVED_IDENTIFIER)
    add_compile_definitions (HAS_RESERVED_IDENTIFIER)
 endif ()

-# If turned `ON`, assumes the user has either the system GTest library or the bundled one.
 option(ENABLE_TESTS "Provide unit_test_dbms target with Google.Test unit tests" ON)
 option(ENABLE_EXAMPLES "Build all example programs in 'examples' subdirectories" OFF)

@ -200,8 +199,8 @@ endif ()
 option(ADD_GDB_INDEX_FOR_GOLD "Add .gdb-index to resulting binaries for gold linker.")

 if (NOT CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE")
-    # Can be lld or ld-lld.
-    if (LINKER_NAME MATCHES "lld$")
+    # Can be lld or ld-lld or lld-13 or /path/to/lld.
+    if (LINKER_NAME MATCHES "lld")
        set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gdb-index")
        set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--gdb-index")
        message (STATUS "Adding .gdb-index via --gdb-index linker option.")
@ -246,7 +245,8 @@ else ()
 endif ()

 # Create BuildID when using lld. For other linkers it is created by default.
-if (LINKER_NAME MATCHES "lld$")
+# (NOTE: LINKER_NAME can be either path or name, and in different variants)
+if (LINKER_NAME MATCHES "lld")
    # SHA1 is not cryptographically secure but it is the best what lld is offering.
    set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--build-id=sha1")
 endif ()
@ -601,6 +601,7 @@ if (NATIVE_BUILD_TARGETS
            "-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}"
            "-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}"
            "-DENABLE_CCACHE=${ENABLE_CCACHE}"
+            "-DENABLE_CLICKHOUSE_SELF_EXTRACTING=${ENABLE_CLICKHOUSE_SELF_EXTRACTING}"
        ${CMAKE_SOURCE_DIR}
        WORKING_DIRECTORY "${NATIVE_BUILD_DIR}"
        COMMAND_ECHO STDOUT)
--- a/PreLoad.cmake
+++ b/PreLoad.cmake
@ -7,12 +7,8 @@
 # How to install Ninja on Ubuntu:
 #  sudo apt-get install ninja-build

-# CLion does not support Ninja
-# You can add your vote on CLion task tracker:
-# https://youtrack.jetbrains.com/issue/CPP-2659
-# https://youtrack.jetbrains.com/issue/CPP-870

-if (NOT DEFINED ENV{CLION_IDE} AND NOT DEFINED ENV{XCODE_IDE})
+if (NOT DEFINED ENV{XCODE_IDE})
    find_program(NINJA_PATH ninja)
    if (NINJA_PATH)
        set(CMAKE_GENERATOR "Ninja" CACHE INTERNAL "")
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@ -159,6 +159,8 @@ add_contrib (s2geometry-cmake s2geometry)
 add_contrib (c-ares-cmake c-ares)
 add_contrib (qpl-cmake qpl)

+add_contrib(annoy-cmake annoy)
+
 # Put all targets defined here and in subdirectories under "contrib/<immediate-subdir>" folders in GUI-based IDEs.
 # Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear
 # in "contrib/..." as originally planned, so we workaround this by fixing FOLDER properties of all targets manually,
--- a/contrib/annoy
+++ b/contrib/annoy
@ -0,0 +1 @@
+Subproject commit 9d8a603a4cd252448589e84c9846f94368d5a289
--- a/contrib/annoy-cmake/CMakeLists.txt
+++ b/contrib/annoy-cmake/CMakeLists.txt
@ -0,0 +1,16 @@
+option(ENABLE_ANNOY "Enable Annoy index support" ${ENABLE_LIBRARIES})
+
+if ((NOT ENABLE_ANNOY) OR (SANITIZE STREQUAL "undefined"))
+    message (STATUS "Not using annoy")
+    return()
+endif()
+
+set(ANNOY_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/annoy")
+set(ANNOY_SOURCE_DIR "${ANNOY_PROJECT_DIR}/src")
+
+add_library(_annoy INTERFACE)
+target_include_directories(_annoy SYSTEM INTERFACE ${ANNOY_SOURCE_DIR})
+
+add_library(ch_contrib::annoy ALIAS _annoy)
+target_compile_definitions(_annoy INTERFACE ENABLE_ANNOY)
+target_compile_definitions(_annoy INTERFACE ANNOYLIB_MULTITHREADED_BUILD)
--- a/docker/packager/binary/Dockerfile
+++ b/docker/packager/binary/Dockerfile
@ -78,6 +78,7 @@ RUN export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \
    && apt-get update \
    && apt-get install \
        clang-15 \
+        llvm-15 \
        clang-tidy-15 \
        --yes --no-install-recommends \
    && apt-get clean
--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@ -26,7 +26,7 @@ ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"

 # set non-empty deb_location_url url to create a docker image
 # from debs created by CI build, for example:
-# docker build . --network host --build-arg version="21.4.1.6282" --build-arg deb_location_url="https://clickhouse-builds.s3.yandex.net/21852/069cfbff388b3d478d1a16dc7060b48073f5d522/clickhouse_build_check/clang-11_relwithdebuginfo_none_bundled_unsplitted_disable_False_deb/" -t filimonovq/clickhouse-server:pr21852
+# docker build . --network host --build-arg version="21.4.1.6282" --build-arg deb_location_url="https://..." -t ...
 ARG deb_location_url=""

 # set non-empty single_binary_location_url to create docker image
--- a/docker/test/fuzzer/run-fuzzer.sh
+++ b/docker/test/fuzzer/run-fuzzer.sh
@ -12,7 +12,7 @@ stage=${stage:-}
 script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 echo "$script_dir"
 repo_dir=ch
-BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-14_debug_none_bundled_unsplitted_disable_False_binary"}
+BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-14_debug_none_unsplitted_disable_False_binary"}
 BINARY_URL_TO_DOWNLOAD=${BINARY_URL_TO_DOWNLOAD:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/$BINARY_TO_DOWNLOAD/clickhouse"}

 function clone
--- a/docker/test/keeper-jepsen/run.sh
+++ b/docker/test/keeper-jepsen/run.sh
@ -2,7 +2,7 @@
 set -euo pipefail


-CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-14_relwithdebuginfo_none_bundled_unsplitted_disable_False_binary/clickhouse"}
+CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-14_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"}
 CLICKHOUSE_REPO_PATH=${CLICKHOUSE_REPO_PATH:=""}


--- a/docker/test/stateless/run.sh
+++ b/docker/test/stateless/run.sh
@ -88,13 +88,15 @@ sleep 5
 function run_tests()
 {
    set -x
-    # We can have several additional options so we path them as array because it's
-    # more idiologically correct.
+    # We can have several additional options so we pass them as array because it is more ideologically correct.
    read -ra ADDITIONAL_OPTIONS <<< "${ADDITIONAL_OPTIONS:-}"

+    HIGH_LEVEL_COVERAGE=YES
+
    # Use random order in flaky check
    if [ "$NUM_TRIES" -gt "1" ]; then
        ADDITIONAL_OPTIONS+=('--order=random')
+        HIGH_LEVEL_COVERAGE=NO
    fi

    if [[ -n "$USE_S3_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_S3_STORAGE_FOR_MERGE_TREE" -eq 1 ]]; then
@ -117,12 +119,17 @@ function run_tests()
        ADDITIONAL_OPTIONS+=("$RUN_BY_HASH_NUM")
        ADDITIONAL_OPTIONS+=('--run-by-hash-total')
        ADDITIONAL_OPTIONS+=("$RUN_BY_HASH_TOTAL")
+        HIGH_LEVEL_COVERAGE=NO
    fi

    if [[ -n "$USE_DATABASE_ORDINARY" ]] && [[ "$USE_DATABASE_ORDINARY" -eq 1 ]]; then
        ADDITIONAL_OPTIONS+=('--db-engine=Ordinary')
    fi

+    if [[ "${HIGH_LEVEL_COVERAGE}" = "YES" ]]; then
+        ADDITIONAL_OPTIONS+=('--report-coverage')
+    fi
+
    set +e
    clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \
            --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \
--- a/docker/test/stateless/setup_hdfs_minicluster.sh
+++ b/docker/test/stateless/setup_hdfs_minicluster.sh
@ -1,4 +1,5 @@
 #!/bin/bash
+# shellcheck disable=SC2024

 set -e -x -a -u

@ -9,7 +10,7 @@ cd hadoop-3.3.1
 export JAVA_HOME=/usr
 mkdir -p target/test/data
 chown clickhouse ./target/test/data
-sudo -E -u clickhouse bin/mapred minicluster -format -nomr -nnport 12222 &
+sudo -E -u clickhouse bin/mapred minicluster -format -nomr -nnport 12222 >> /test_output/garbage.log 2>&1 &

 while ! nc -z localhost 12222; do
  sleep 1
--- a/docker/test/stress/run.sh
+++ b/docker/test/stress/run.sh
@ -105,12 +105,16 @@ EOL

 function stop()
 {
+    local pid
+    # Preserve the pid, since the server can hung after the PID will be deleted.
+    pid="$(cat /var/run/clickhouse-server/clickhouse-server.pid)"
+
    clickhouse stop --do-not-kill && return
    # We failed to stop the server with SIGTERM. Maybe it hang, let's collect stacktraces.
    kill -TERM "$(pidof gdb)" ||:
    sleep 5
    echo "thread apply all backtrace (on stop)" >> /test_output/gdb.log
-    gdb -batch -ex 'thread apply all backtrace' -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log
+    gdb -batch -ex 'thread apply all backtrace' -p "$pid" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log
    clickhouse stop --force
 }

@ -334,7 +338,6 @@ else
    rm -rf /var/lib/clickhouse/*

    # Make BC check more funny by forcing Ordinary engine for system database
-    # New version will try to convert it to Atomic on startup
    mkdir /var/lib/clickhouse/metadata
    echo "ATTACH DATABASE system ENGINE=Ordinary" > /var/lib/clickhouse/metadata/system.sql

@ -344,16 +347,13 @@ else
    # Start server from previous release
    configure

-    # Avoid "Setting allow_deprecated_database_ordinary is neither a builtin setting..."
-    rm -f /etc/clickhouse-server/users.d/database_ordinary.xml ||:
+    # Avoid "Setting s3_check_objects_after_upload is neither a builtin setting..."
+    rm -f /etc/clickhouse-server/users.d/enable_blobs_check.xml ||:

    # Remove s3 related configs to avoid "there is no disk type `cache`"
    rm -f /etc/clickhouse-server/config.d/storage_conf.xml ||:
    rm -f /etc/clickhouse-server/config.d/azure_storage_conf.xml ||:

-    # Disable aggressive cleanup of tmp dirs (it worked incorrectly before 22.8)
-    rm -f /etc/clickhouse-server/config.d/merge_tree_old_dirs_cleanup.xml ||:
-
    start

    clickhouse-client --query="SELECT 'Server version: ', version()"
@ -476,6 +476,13 @@ else
    [ -s /test_output/bc_check_fatal_messages.txt ] || rm /test_output/bc_check_fatal_messages.txt
 fi

+dmesg -T > /test_output/dmesg.log
+
+# OOM in dmesg -- those are real
+grep -q -F -e 'Out of memory: Killed process' -e 'oom_reaper: reaped process' -e 'oom-kill:constraint=CONSTRAINT_NONE' /test_output/dmesg.log \
+    && echo -e 'OOM in dmesg\tFAIL' >> /test_output/test_results.tsv \
+    || echo -e 'No OOM in dmesg\tOK' >> /test_output/test_results.tsv
+
 tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||:
 mv /var/log/clickhouse-server/stderr.log /test_output/

@ -497,5 +504,3 @@ for core in core.*; do
    pigz $core
    mv $core.gz /test_output/
 done
-
-dmesg -T > /test_output/dmesg.log
--- a/docs/_includes/install/tgz.sh
+++ b/docs/_includes/install/tgz.sh
@ -14,8 +14,6 @@ do
    || curl -fO "https://packages.clickhouse.com/tgz/stable/$PKG-$LATEST_VERSION.tgz"
 done

-exit 0
-
 tar -xzvf "clickhouse-common-static-$LATEST_VERSION-${ARCH}.tgz" \
  || tar -xzvf "clickhouse-common-static-$LATEST_VERSION.tgz"
 sudo "clickhouse-common-static-$LATEST_VERSION/install/doinst.sh"
@ -26,7 +24,7 @@ sudo "clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh"

 tar -xzvf "clickhouse-server-$LATEST_VERSION-${ARCH}.tgz" \
  || tar -xzvf "clickhouse-server-$LATEST_VERSION.tgz"
-sudo "clickhouse-server-$LATEST_VERSION/install/doinst.sh"
+sudo "clickhouse-server-$LATEST_VERSION/install/doinst.sh" configure
 sudo /etc/init.d/clickhouse-server start

 tar -xzvf "clickhouse-client-$LATEST_VERSION-${ARCH}.tgz" \
--- a/docs/changelogs/v22.3.12.19-lts.md
+++ b/docs/changelogs/v22.3.12.19-lts.md
@ -0,0 +1,25 @@
+---
+sidebar_position: 1
+sidebar_label: 2022
+---
+
+# 2022 Changelog
+
+### ClickHouse release v22.3.12.19-lts (4a08f8a073b) FIXME as compared to v22.3.11.12-lts (137c5f72657)
+
+#### Build/Testing/Packaging Improvement
+* Backported in [#40695](https://github.com/ClickHouse/ClickHouse/issues/40695): Fix TGZ packages. [#40681](https://github.com/ClickHouse/ClickHouse/pull/40681) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
+#### Bug Fix (user-visible misbehavior in official stable or prestable release)
+
+* Backported in [#40160](https://github.com/ClickHouse/ClickHouse/issues/40160): fix HashMethodOneNumber get wrong key value when column is const. [#40020](https://github.com/ClickHouse/ClickHouse/pull/40020) ([Duc Canh Le](https://github.com/canhld94)).
+* Backported in [#40122](https://github.com/ClickHouse/ClickHouse/issues/40122): Fix bug in collectFilesToSkip() by adding correct file extension(.idx or idx2) for indexes to be recalculated, avoid wrong hard links. Fixed [#39896](https://github.com/ClickHouse/ClickHouse/issues/39896). [#40095](https://github.com/ClickHouse/ClickHouse/pull/40095) ([Jianmei Zhang](https://github.com/zhangjmruc)).
+* Backported in [#40207](https://github.com/ClickHouse/ClickHouse/issues/40207): Fix insufficient argument check for encryption functions (found by query fuzzer). This closes [#39987](https://github.com/ClickHouse/ClickHouse/issues/39987). [#40194](https://github.com/ClickHouse/ClickHouse/pull/40194) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#40270](https://github.com/ClickHouse/ClickHouse/issues/40270): Fix possible segfault in CapnProto input format. This bug was found and send through ClickHouse bug-bounty [program](https://github.com/ClickHouse/ClickHouse/issues/38986) by *kiojj*. [#40241](https://github.com/ClickHouse/ClickHouse/pull/40241) ([Kruglov Pavel](https://github.com/Avogar)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* fix heap buffer overflow by limiting http chunk size [#40292](https://github.com/ClickHouse/ClickHouse/pull/40292) ([Sema Checherinda](https://github.com/CheSema)).
+* Reduce changelog verbosity in CI [#40360](https://github.com/ClickHouse/ClickHouse/pull/40360) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Backport the upstream clickhouse_helper.py [#40490](https://github.com/ClickHouse/ClickHouse/pull/40490) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
--- a/docs/changelogs/v22.6.7.7-stable.md
+++ b/docs/changelogs/v22.6.7.7-stable.md
@ -0,0 +1,17 @@
+---
+sidebar_position: 1
+sidebar_label: 2022
+---
+
+# 2022 Changelog
+
+### ClickHouse release v22.6.7.7-stable (8eae2af3b9a) FIXME as compared to v22.6.6.16-stable (d2a33ebc822)
+
+#### Build/Testing/Packaging Improvement
+* Backported in [#40692](https://github.com/ClickHouse/ClickHouse/issues/40692): Fix TGZ packages. [#40681](https://github.com/ClickHouse/ClickHouse/pull/40681) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
+#### Bug Fix (user-visible misbehavior in official stable or prestable release)
+
+* Backported in [#40531](https://github.com/ClickHouse/ClickHouse/issues/40531): Proxy resolver stop on first successful request to endpoint. [#40353](https://github.com/ClickHouse/ClickHouse/pull/40353) ([Maksim Kita](https://github.com/kitaisreal)).
+* Backported in [#40623](https://github.com/ClickHouse/ClickHouse/issues/40623): Fix potential dataloss due to a bug in AWS SDK (https://github.com/aws/aws-sdk-cpp/issues/658). Bug can be triggered only when clickhouse is used over S3. [#40506](https://github.com/ClickHouse/ClickHouse/pull/40506) ([alesapin](https://github.com/alesapin)).
+
--- a/docs/changelogs/v22.7.5.13-stable.md
+++ b/docs/changelogs/v22.7.5.13-stable.md
@ -0,0 +1,23 @@
+---
+sidebar_position: 1
+sidebar_label: 2022
+---
+
+# 2022 Changelog
+
+### ClickHouse release v22.7.5.13-stable (6f48d2d1f59) FIXME as compared to v22.7.4.16-stable (0b9272f8fdc)
+
+#### Build/Testing/Packaging Improvement
+* Backported in [#40693](https://github.com/ClickHouse/ClickHouse/issues/40693): Fix TGZ packages. [#40681](https://github.com/ClickHouse/ClickHouse/pull/40681) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
+#### Bug Fix (user-visible misbehavior in official stable or prestable release)
+
+* Backported in [#40542](https://github.com/ClickHouse/ClickHouse/issues/40542): Fix potential deadlock in WriteBufferFromS3 during task scheduling failure. [#40070](https://github.com/ClickHouse/ClickHouse/pull/40070) ([Maksim Kita](https://github.com/kitaisreal)).
+* Backported in [#40450](https://github.com/ClickHouse/ClickHouse/issues/40450): Fix rare bug with column TTL for MergeTree engines family: In case of repeated vertical merge the error `Cannot unlink file ColumnName.bin ... No such file or directory.` could happen. [#40346](https://github.com/ClickHouse/ClickHouse/pull/40346) ([alesapin](https://github.com/alesapin)).
+* Backported in [#40532](https://github.com/ClickHouse/ClickHouse/issues/40532): Proxy resolver stop on first successful request to endpoint. [#40353](https://github.com/ClickHouse/ClickHouse/pull/40353) ([Maksim Kita](https://github.com/kitaisreal)).
+* Backported in [#40624](https://github.com/ClickHouse/ClickHouse/issues/40624): Fix potential dataloss due to a bug in AWS SDK (https://github.com/aws/aws-sdk-cpp/issues/658). Bug can be triggered only when clickhouse is used over S3. [#40506](https://github.com/ClickHouse/ClickHouse/pull/40506) ([alesapin](https://github.com/alesapin)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* DNSResolver remove AI_V4MAPPED, AI_ALL hints [#40502](https://github.com/ClickHouse/ClickHouse/pull/40502) ([Maksim Kita](https://github.com/kitaisreal)).
+
--- a/docs/changelogs/v22.8.3.13-lts.md
+++ b/docs/changelogs/v22.8.3.13-lts.md
@ -0,0 +1,25 @@
+---
+sidebar_position: 1
+sidebar_label: 2022
+---
+
+# 2022 Changelog
+
+### ClickHouse release v22.8.3.13-lts (6a15b73faea) FIXME as compared to v22.8.2.11-lts (b4ed6d744ff)
+
+#### Improvement
+* Backported in [#40550](https://github.com/ClickHouse/ClickHouse/issues/40550): Improve schema inference cache, respect format settings that can change the schema. [#40414](https://github.com/ClickHouse/ClickHouse/pull/40414) ([Kruglov Pavel](https://github.com/Avogar)).
+
+#### Build/Testing/Packaging Improvement
+* Backported in [#40694](https://github.com/ClickHouse/ClickHouse/issues/40694): Fix TGZ packages. [#40681](https://github.com/ClickHouse/ClickHouse/pull/40681) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
+#### Bug Fix (user-visible misbehavior in official stable or prestable release)
+
+* Backported in [#40451](https://github.com/ClickHouse/ClickHouse/issues/40451): Fix rare bug with column TTL for MergeTree engines family: In case of repeated vertical merge the error `Cannot unlink file ColumnName.bin ... No such file or directory.` could happen. [#40346](https://github.com/ClickHouse/ClickHouse/pull/40346) ([alesapin](https://github.com/alesapin)).
+* Backported in [#40533](https://github.com/ClickHouse/ClickHouse/issues/40533): Proxy resolver stop on first successful request to endpoint. [#40353](https://github.com/ClickHouse/ClickHouse/pull/40353) ([Maksim Kita](https://github.com/kitaisreal)).
+* Backported in [#40625](https://github.com/ClickHouse/ClickHouse/issues/40625): Fix potential dataloss due to a bug in AWS SDK (https://github.com/aws/aws-sdk-cpp/issues/658). Bug can be triggered only when clickhouse is used over S3. [#40506](https://github.com/ClickHouse/ClickHouse/pull/40506) ([alesapin](https://github.com/alesapin)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* DNSResolver remove AI_V4MAPPED, AI_ALL hints [#40502](https://github.com/ClickHouse/ClickHouse/pull/40502) ([Maksim Kita](https://github.com/kitaisreal)).
+
--- a/docs/en/development/adding_test_queries.md
+++ b/docs/en/development/adding_test_queries.md
@ -1,10 +1,11 @@
 ---
+slug: /en/development/adding_test_queries
 sidebar_label: Adding Test Queries
 sidebar_position: 63
+title: How to add test queries to ClickHouse CI
 description: Instructions on how to add a test case to ClickHouse continuous integration
 ---

-# How to add test queries to ClickHouse CI

 ClickHouse has hundreds (or even thousands) of features. Every commit gets checked by a complex set of tests containing many thousands of test cases.

--- a/docs/en/development/architecture.md
+++ b/docs/en/development/architecture.md
@ -1,4 +1,5 @@
 ---
+slug: /en/development/architecture
 sidebar_label: Architecture Overview
 sidebar_position: 62
 ---
--- a/docs/en/development/browse-code.md
+++ b/docs/en/development/browse-code.md
@ -1,4 +1,5 @@
 ---
+slug: /en/development/browse-code
 sidebar_label: Source Code Browser
 sidebar_position: 72
 description: Various ways to browse and edit the source code
--- a/docs/en/development/build-cross-arm.md
+++ b/docs/en/development/build-cross-arm.md
@ -1,10 +1,10 @@
 ---
+slug: /en/development/build-cross-arm
 sidebar_position: 67
+title: How to Build ClickHouse on Linux for AARCH64 (ARM64) Architecture 
 sidebar_label: Build on Linux for AARCH64 (ARM64)
 ---

-# How to Build ClickHouse on Linux for AARCH64 (ARM64) Architecture 
-
 If you use AArch64 machine and want to build ClickHouse for AArch64, build as usual.

 If you use x86_64 machine and want cross-compile for AArch64, add the following flag to `cmake`: `-DCMAKE_TOOLCHAIN_FILE=cmake/linux/toolchain-aarch64.cmake`
--- a/docs/en/development/build-cross-osx.md
+++ b/docs/en/development/build-cross-osx.md
@ -1,9 +1,10 @@
 ---
+slug: /en/development/build-cross-osx
 sidebar_position: 66
+title: How to Build ClickHouse on Linux for Mac OS X 
 sidebar_label: Build on Linux for Mac OS X
 ---

-# How to Build ClickHouse on Linux for Mac OS X 

 This is for the case when you have a Linux machine and want to use it to build `clickhouse` binary that will run on OS X. 
 This is intended for continuous integration checks that run on Linux servers. If you want to build ClickHouse directly on Mac OS X, then proceed with [another instruction](../development/build-osx.md).
--- a/docs/en/development/build-cross-riscv.md
+++ b/docs/en/development/build-cross-riscv.md
@ -1,10 +1,10 @@
 ---
+slug: /en/development/build-cross-riscv
 sidebar_position: 68
+title: How to Build ClickHouse on Linux for RISC-V 64 Architecture 
 sidebar_label: Build on Linux for RISC-V 64
 ---

-# How to Build ClickHouse on Linux for RISC-V 64 Architecture 
-
 As of writing (11.11.2021) building for risc-v considered to be highly experimental. Not all features can be enabled.

 This is for the case when you have Linux machine and want to use it to build `clickhouse` binary that will run on another Linux machine with RISC-V 64 CPU architecture. This is intended for continuous integration checks that run on Linux servers.
--- a/docs/en/development/build-osx.md
+++ b/docs/en/development/build-osx.md
@ -1,11 +1,11 @@
 ---
+slug: /en/development/build-osx
 sidebar_position: 65
 sidebar_label: Build on Mac OS X
+title: How to Build ClickHouse on Mac OS X
 description: How to build ClickHouse on Mac OS X
 ---

-# How to Build ClickHouse on Mac OS X
-
 :::info You don't have to build ClickHouse yourself!
 You can install pre-built ClickHouse as described in [Quick Start](https://clickhouse.com/#quick-start). Follow **macOS (Intel)** or **macOS (Apple silicon)** installation instructions.
 :::
--- a/docs/en/development/build.md
+++ b/docs/en/development/build.md
@ -1,10 +1,11 @@
 ---
+slug: /en/development/build
 sidebar_position: 64
 sidebar_label: Build on Linux
+title: How to Build ClickHouse on Linux
 description: How to build ClickHouse on Linux
 ---

-# How to Build ClickHouse on Linux

 Supported platforms:

--- a/docs/en/development/continuous-integration.md
+++ b/docs/en/development/continuous-integration.md
@ -1,11 +1,11 @@
 ---
+slug: /en/development/continuous-integration
 sidebar_position: 62
 sidebar_label: Continuous Integration Checks
+title: Continuous Integration Checks
 description: When you submit a pull request, some automated checks are ran for your code by the ClickHouse continuous integration (CI) system
 ---

-# Continuous Integration Checks
-
 When you submit a pull request, some automated checks are ran for your code by
 the ClickHouse [continuous integration (CI) system](tests.md#test-automation).
 This happens after a repository maintainer (someone from ClickHouse team) has
@ -54,7 +54,7 @@ the documentation is wrong. Go to the check report and look for `ERROR` and `WAR
 Check that the description of your pull request conforms to the template
 [PULL_REQUEST_TEMPLATE.md](https://github.com/ClickHouse/ClickHouse/blob/master/.github/PULL_REQUEST_TEMPLATE.md).
 You have to specify a changelog category for your change (e.g., Bug Fix), and
-write a user-readable message describing the change for [CHANGELOG.md](../whats-new/changelog/)
+write a user-readable message describing the change for [CHANGELOG.md](../whats-new/changelog/index.md)


 ## Push To DockerHub
--- a/docs/en/development/contrib.md
+++ b/docs/en/development/contrib.md
@ -1,4 +1,5 @@
 ---
+slug: /en/development/contrib
 sidebar_position: 71
 sidebar_label: Third-Party Libraries
 description: A list of third-party libraries used
--- a/docs/en/development/developer-instruction.md
+++ b/docs/en/development/developer-instruction.md
@ -1,4 +1,5 @@
 ---
+slug: /en/development/developer-instruction
 sidebar_position: 61
 sidebar_label: Getting Started
 description: Prerequisites and an overview of how to build ClickHouse
@ -285,9 +286,4 @@ If you are not interested in functionality provided by third-party libraries, yo
 -DENABLE_LIBRARIES=0 -DENABLE_EMBEDDED_COMPILER=0
 ```

-Compressing the binary at the end of the build may take a while, disable the self-extraction feature via
-```
-DENABLE_CLICKHOUSE_SELF_EXTRACTING=0
-```
-
 In case of problems with any of the development options, you are on your own!
--- a/docs/en/development/index.md
+++ b/docs/en/development/index.md
@ -1,8 +0,0 @@
---
-sidebar_label: Development
-sidebar_position: 58
---
-
-# ClickHouse Development
-
-[Original article](https://clickhouse.com/docs/en/development/) <!--hide-->
--- a/docs/en/development/integrating_rust_libraries.md
+++ b/docs/en/development/integrating_rust_libraries.md
@ -1,3 +1,6 @@
+---
+slug: /en/development/integrating_rust_libraries
+---
 # Integrating Rust libraries

 Rust library integration will be described based on BLAKE3 hash-function integration.
--- a/docs/en/development/style.md
+++ b/docs/en/development/style.md
@ -1,4 +1,5 @@
 ---
+slug: /en/development/style
 sidebar_position: 69
 sidebar_label: C++ Guide
 description: A list of recommendations regarding coding style, naming convention, formatting and more
--- a/docs/en/development/tests.md
+++ b/docs/en/development/tests.md
@ -1,11 +1,11 @@
 ---
+slug: /en/development/tests
 sidebar_position: 70
 sidebar_label: Testing
+title: ClickHouse Testing
 description: Most of ClickHouse features can be tested with functional tests and they are mandatory to use for every change in ClickHouse code that can be tested that way.
 ---

-# ClickHouse Testing
-
 ## Functional Tests

 Functional tests are the most simple and convenient to use. Most of ClickHouse features can be tested with functional tests and they are mandatory to use for every change in ClickHouse code that can be tested that way.
--- a/docs/en/engines/database-engines/atomic.md
+++ b/docs/en/engines/database-engines/atomic.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/database-engines/atomic
 sidebar_label: Atomic
 sidebar_position: 10
 ---
--- a/docs/en/engines/database-engines/index.md
+++ b/docs/en/engines/database-engines/index.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/database-engines/
 toc_folder_title: Database Engines
 toc_priority: 27
 toc_title: Introduction
--- a/docs/en/engines/database-engines/lazy.md
+++ b/docs/en/engines/database-engines/lazy.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/database-engines/lazy
 sidebar_label: Lazy
 sidebar_position: 20
 ---
--- a/docs/en/engines/database-engines/materialized-mysql.md
+++ b/docs/en/engines/database-engines/materialized-mysql.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/database-engines/materialized-mysql
 sidebar_label: MaterializedMySQL
 sidebar_position: 70
 ---
--- a/docs/en/engines/database-engines/materialized-postgresql.md
+++ b/docs/en/engines/database-engines/materialized-postgresql.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/database-engines/materialized-postgresql
 sidebar_label: MaterializedPostgreSQL
 sidebar_position: 60
 ---
--- a/docs/en/engines/database-engines/mysql.md
+++ b/docs/en/engines/database-engines/mysql.md
@ -1,9 +1,10 @@
 ---
+slug: /en/engines/database-engines/mysql
 sidebar_position: 50
 sidebar_label: MySQL
 ---

-# MySQL 
+# MySQL

 Allows to connect to databases on a remote MySQL server and perform `INSERT` and `SELECT` queries to exchange data between ClickHouse and MySQL.

@ -98,7 +99,7 @@ mysql> select * from mysql_table;
 Database in ClickHouse, exchanging data with the MySQL server:

 ``` sql
-CREATE DATABASE mysql_db ENGINE = MySQL('localhost:3306', 'test', 'my_user', 'user_password')
+CREATE DATABASE mysql_db ENGINE = MySQL('localhost:3306', 'test', 'my_user', 'user_password') SETTINGS read_write_timeout=10000, connect_timeout=100;
 ```

 ``` sql
--- a/docs/en/engines/database-engines/postgresql.md
+++ b/docs/en/engines/database-engines/postgresql.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/database-engines/postgresql
 sidebar_position: 40
 sidebar_label: PostgreSQL
 ---
--- a/docs/en/engines/database-engines/replicated.md
+++ b/docs/en/engines/database-engines/replicated.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/database-engines/replicated
 sidebar_position: 30
 sidebar_label: Replicated
 ---
--- a/docs/en/engines/database-engines/sqlite.md
+++ b/docs/en/engines/database-engines/sqlite.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/database-engines/sqlite
 sidebar_position: 55
 sidebar_label: SQLite
 ---
--- a/docs/en/engines/table-engines/index.md
+++ b/docs/en/engines/table-engines/index.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/table-engines/
 toc_folder_title: Table Engines
 toc_priority: 26
 toc_title: Introduction
--- a/docs/en/engines/table-engines/integrations/ExternalDistributed.md
+++ b/docs/en/engines/table-engines/integrations/ExternalDistributed.md
@ -1,10 +1,10 @@
 ---
+slug: /en/engines/table-engines/integrations/ExternalDistributed
 sidebar_position: 12
 sidebar_label: ExternalDistributed
+title: ExternalDistributed
 ---

-# ExternalDistributed
-
 The `ExternalDistributed` engine allows to perform `SELECT` queries on data that is stored on a remote servers MySQL or PostgreSQL. Accepts [MySQL](../../../engines/table-engines/integrations/mysql.md) or [PostgreSQL](../../../engines/table-engines/integrations/postgresql.md) engines as an argument so sharding is possible.

 ## Creating a Table {#creating-a-table}
--- a/docs/en/engines/table-engines/integrations/embedded-rocksdb.md
+++ b/docs/en/engines/table-engines/integrations/embedded-rocksdb.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/table-engines/integrations/embedded-rocksdb
 sidebar_position: 9
 sidebar_label: EmbeddedRocksDB
 ---
--- a/docs/en/engines/table-engines/integrations/hdfs.md
+++ b/docs/en/engines/table-engines/integrations/hdfs.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/table-engines/integrations/hdfs
 sidebar_position: 6
 sidebar_label: HDFS
 ---
--- a/docs/en/engines/table-engines/integrations/hive.md
+++ b/docs/en/engines/table-engines/integrations/hive.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/table-engines/integrations/hive
 sidebar_position: 4
 sidebar_label: Hive
 ---
--- a/docs/en/engines/table-engines/integrations/index.md
+++ b/docs/en/engines/table-engines/integrations/index.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/table-engines/integrations/
 sidebar_position: 40
 sidebar_label:  Integrations
 ---
--- a/docs/en/engines/table-engines/integrations/jdbc.md
+++ b/docs/en/engines/table-engines/integrations/jdbc.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/table-engines/integrations/jdbc
 sidebar_position: 3
 sidebar_label: JDBC
 ---
--- a/docs/en/engines/table-engines/integrations/kafka.md
+++ b/docs/en/engines/table-engines/integrations/kafka.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/table-engines/integrations/kafka
 sidebar_position: 8
 sidebar_label: Kafka
 ---
--- a/docs/en/engines/table-engines/integrations/materialized-postgresql.md
+++ b/docs/en/engines/table-engines/integrations/materialized-postgresql.md
@ -1,10 +1,10 @@
 ---
+slug: /en/engines/table-engines/integrations/materialized-postgresql
 sidebar_position: 12
 sidebar_label: MaterializedPostgreSQL
+title: MaterializedPostgreSQL
 ---

-# MaterializedPostgreSQL
-
 Creates ClickHouse table with an initial data dump of PostgreSQL table and starts replication process, i.e. executes background job to apply new changes as they happen on PostgreSQL table in the remote PostgreSQL database.

 If more than one table is required, it is highly recommended to use the [MaterializedPostgreSQL](../../../engines/database-engines/materialized-postgresql.md) database engine instead of the table engine and use the `materialized_postgresql_tables_list` setting, which specifies the tables to be replicated (will also be possible to add database `schema`). It will be much better in terms of CPU, fewer connections and fewer replication slots inside the remote PostgreSQL database.
--- a/docs/en/engines/table-engines/integrations/mongodb.md
+++ b/docs/en/engines/table-engines/integrations/mongodb.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/table-engines/integrations/mongodb
 sidebar_position: 5
 sidebar_label: MongoDB
 ---
--- a/docs/en/engines/table-engines/integrations/mysql.md
+++ b/docs/en/engines/table-engines/integrations/mysql.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/table-engines/integrations/mysql
 sidebar_position: 4
 sidebar_label: MySQL
 ---
--- a/docs/en/engines/table-engines/integrations/nats.md
+++ b/docs/en/engines/table-engines/integrations/nats.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/table-engines/integrations/nats
 sidebar_position: 14
 sidebar_label: NATS
 ---
--- a/docs/en/engines/table-engines/integrations/odbc.md
+++ b/docs/en/engines/table-engines/integrations/odbc.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/table-engines/integrations/odbc
 sidebar_position: 2
 sidebar_label: ODBC
 ---
--- a/docs/en/engines/table-engines/integrations/postgresql.md
+++ b/docs/en/engines/table-engines/integrations/postgresql.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/table-engines/integrations/postgresql
 sidebar_position: 11
 sidebar_label: PostgreSQL
 ---
--- a/docs/en/engines/table-engines/integrations/rabbitmq.md
+++ b/docs/en/engines/table-engines/integrations/rabbitmq.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/table-engines/integrations/rabbitmq
 sidebar_position: 10
 sidebar_label: RabbitMQ
 ---
--- a/docs/en/engines/table-engines/integrations/s3.md
+++ b/docs/en/engines/table-engines/integrations/s3.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/table-engines/integrations/s3
 sidebar_position: 7
 sidebar_label: S3
 ---
--- a/docs/en/engines/table-engines/integrations/sqlite.md
+++ b/docs/en/engines/table-engines/integrations/sqlite.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/table-engines/integrations/sqlite
 sidebar_position: 7
 sidebar_label: SQLite
 ---
--- a/docs/en/engines/table-engines/log-family/index.md
+++ b/docs/en/engines/table-engines/log-family/index.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/table-engines/log-family/
 sidebar_position: 20
 sidebar_label:  Log Family
 ---
--- a/docs/en/engines/table-engines/log-family/log.md
+++ b/docs/en/engines/table-engines/log-family/log.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/table-engines/log-family/log
 toc_priority: 33
 toc_title: Log
 ---
--- a/docs/en/engines/table-engines/log-family/stripelog.md
+++ b/docs/en/engines/table-engines/log-family/stripelog.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/table-engines/log-family/stripelog
 toc_priority: 32
 toc_title: StripeLog
 ---
--- a/docs/en/engines/table-engines/log-family/tinylog.md
+++ b/docs/en/engines/table-engines/log-family/tinylog.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/table-engines/log-family/tinylog
 toc_priority: 34
 toc_title: TinyLog
 ---
--- a/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/table-engines/mergetree-family/aggregatingmergetree
 sidebar_position: 60
 sidebar_label:  AggregatingMergeTree
 ---
--- a/docs/en/engines/table-engines/mergetree-family/annindexes.md
+++ b/docs/en/engines/table-engines/mergetree-family/annindexes.md
@ -0,0 +1,125 @@
+# Approximate Nearest Neighbor Search Indexes [experimental] {#table_engines-ANNIndex}
+
+The main task that indexes achieve is to quickly find nearest neighbors for multidimensional data. An example of such a problem can be finding similar pictures (texts) for a given picture (text). That problem can be reduced to finding the nearest [embeddings](https://cloud.google.com/architecture/overview-extracting-and-serving-feature-embeddings-for-machine-learning). They can be created from data using [UDF](../../../sql-reference/functions/index.md#executable-user-defined-functions).
+
+The next query finds the closest neighbors in N-dimensional space using the L2 (Euclidean) distance:
+``` sql 
+SELECT * 
+FROM table_name 
+WHERE L2Distance(Column, Point) < MaxDistance 
+LIMIT N
+```
+But it will take some time for execution because of the long calculation of the distance between `TargetEmbedding` and all other vectors. This is where ANN indexes can help. They store a compact approximation of the search space (e.g. using clustering, search trees, etc.) and are able to compute approximate neighbors quickly.
+
+## Indexes Structure
+
+Approximate Nearest Neighbor Search Indexes (`ANNIndexes`) are similar to skip indexes. They are constructed by some granules and determine which of them should be skipped. Compared to skip indices, ANN indices use their results not only to skip some group of granules, but also to select particular granules from a set of granules.
+
+`ANNIndexes` are designed to speed up two types of queries:
+
+- ######  Type 1: Where 
+   ``` sql 
+   SELECT * 
+   FROM table_name 
+   WHERE DistanceFunction(Column, Point) < MaxDistance 
+   LIMIT N
+   ```
+- ###### Type 2: Order by
+  ``` sql
+  SELECT * 
+  FROM table_name [WHERE ...] 
+  ORDER BY DistanceFunction(Column, Point) 
+  LIMIT N
+  ```
+
+In these queries, `DistanceFunction` is selected from [distance functions](../../../sql-reference/functions/distance-functions). `Point` is a known vector (something like `(0.1, 0.1, ... )`). To avoid writing large vectors, use [client parameters](../../../interfaces/cli.md#queries-with-parameters-cli-queries-with-parameters). `Value` - a float value that will bound the neighbourhood.
+
+!!! note "Note"
+    ANN index can't speed up query that satisfies both types(`where + order by`, only one of them). All queries must have the limit, as algorithms are used to find nearest neighbors and need a specific number of them.
+
+!!! note "Note"
+    Indexes are applied only to queries with a limit less than the `max_limit_for_ann_queries` setting. This helps to avoid memory overflows in queries with a large limit. `max_limit_for_ann_queries` setting can be changed if you know you can provide enough memory. The default value is `1000000`.
+
+Both types of queries are handled the same way. The indexes get `n` neighbors (where `n` is taken from the `LIMIT` clause) and work with them. In `ORDER BY` query they remember the numbers of all parts of the granule that have at least one of neighbor. In `WHERE` query they remember only those parts that satisfy the requirements.
+
+
+
+## Create table with ANNIndex
+
+```sql
+CREATE TABLE t
+(
+  `id` Int64,
+  `number` Tuple(Float32, Float32, Float32),
+  INDEX x number TYPE annoy GRANULARITY N
+)
+ENGINE = MergeTree
+ORDER BY id;
+```
+
+```sql
+CREATE TABLE t
+(
+  `id` Int64,
+  `number` Array(Float32),
+  INDEX x number TYPE annoy GRANULARITY N
+)
+ENGINE = MergeTree
+ORDER BY id;
+```
+
+With greater `GRANULARITY` indexes remember the data structure better. The `GRANULARITY` indicates how many granules will be used to construct the index. The more data is provided for the index, the more of it can be handled by one index and the more chances that with the right hyperparameters the index will remember the data structure better. But some indexes can't be built if they don't have enough data, so this granule will always participate in the query. For more information, see the description of indexes.
+
+As the indexes are built only during insertions into table, `INSERT` and `OPTIMIZE` queries are slower than for ordinary table. At this stage indexes remember all the information about the given data. ANNIndexes should be used if you have immutable or rarely changed data and many read requests.
+    
+You can create your table with index which uses certain algorithm. Now only indices based on the following algorithms are supported:
+
+# Index list
+- [Annoy](../../../engines/table-engines/mergetree-family/annindexes.md#annoy-annoy)
+
+# Annoy {#annoy}
+Implementation of the algorithm was taken from [this repository](https://github.com/spotify/annoy).
+
+Short description of the algorithm:
+The algorithm recursively divides in half all space by random linear surfaces (lines in 2D, planes in 3D e.t.c.). Thus it makes tree of polyhedrons and points that they contains. Repeating the operation several times for greater accuracy it creates a forest.
+To find K Nearest Neighbours it goes down through the trees and fills the buffer of closest points using the priority queue of polyhedrons. Next, it sorts buffer and return the nearest K points.
+
+__Examples__:
+```sql
+CREATE TABLE t
+(
+  id Int64,
+  number Tuple(Float32, Float32, Float32),
+  INDEX x number TYPE annoy(T) GRANULARITY N
+)
+ENGINE = MergeTree
+ORDER BY id;
+```
+
+```sql
+CREATE TABLE t
+(
+  id Int64,
+  number Array(Float32),
+  INDEX x number TYPE annoy(T) GRANULARITY N
+)
+ENGINE = MergeTree
+ORDER BY id;
+```
+!!! note "Note"
+    Table with array field will work faster, but all arrays **must** have same length. Use [CONSTRAINT](../../../sql-reference/statements/create/table.md#constraints) to avoid errors. For example, `CONSTRAINT constraint_name_1 CHECK length(number) = 256`.
+
+Parameter `T` is the number of trees which algorithm will create. The bigger it is, the slower (approximately linear) it works (in both `CREATE` and `SELECT` requests), but the better accuracy you get (adjusted for randomness). 
+
+Annoy supports only `L2Distance`.
+
+In the `SELECT` in the settings (`ann_index_select_query_params`) you can specify the size of the internal buffer (more details in the description above or in the [original repository](https://github.com/spotify/annoy)). During the query it will inspect up to `search_k` nodes which defaults to `n_trees * n` if not provided. `search_k` gives you a run-time tradeoff between better accuracy and speed.
+
+__Example__:
+``` sql
+SELECT * 
+FROM table_name [WHERE ...] 
+ORDER BY L2Distance(Column, Point) 
+LIMIT N
+SETTING ann_index_select_query_params=`k_search=100`
+```
--- a/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/table-engines/mergetree-family/collapsingmergetree
 sidebar_position: 70
 sidebar_label: CollapsingMergeTree
 ---
--- a/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md
+++ b/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/table-engines/mergetree-family/custom-partitioning-key
 sidebar_position: 30
 sidebar_label: Custom Partitioning Key
 ---
--- a/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/table-engines/mergetree-family/graphitemergetree
 sidebar_position: 90
 sidebar_label:  GraphiteMergeTree
 ---
--- a/docs/en/engines/table-engines/mergetree-family/index.md
+++ b/docs/en/engines/table-engines/mergetree-family/index.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/table-engines/mergetree-family/
 sidebar_position: 10
 sidebar_label: MergeTree Family
 ---
--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/table-engines/mergetree-family/mergetree
 sidebar_position: 11
 sidebar_label:  MergeTree
 ---
@ -480,6 +481,10 @@ For example:
    -   `NOT startsWith(s, 'test')`
 :::

+
+## Approximate Nearest Neighbor Search Indexes [experimental] {#table_engines-ANNIndex}
+In addition to skip indices, there are also [Approximate Nearest Neighbor Search Indexes](../../../engines/table-engines/mergetree-family/annindexes.md).
+
 ## Projections {#projections}
 Projections are like [materialized views](../../../sql-reference/statements/create/view.md#materialized) but defined in part-level. It provides consistency guarantees along with automatic usage in queries.

--- a/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/table-engines/mergetree-family/replacingmergetree
 sidebar_position: 40
 sidebar_label:  ReplacingMergeTree
 ---
--- a/docs/en/engines/table-engines/mergetree-family/replication.md
+++ b/docs/en/engines/table-engines/mergetree-family/replication.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/table-engines/mergetree-family/replication
 sidebar_position: 20
 sidebar_label: Data Replication
 ---
--- a/docs/en/engines/table-engines/mergetree-family/summingmergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/summingmergetree.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/table-engines/mergetree-family/summingmergetree
 sidebar_position: 50
 sidebar_label:  SummingMergeTree
 ---
--- a/docs/en/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/table-engines/mergetree-family/versionedcollapsingmergetree
 sidebar_position: 80
 sidebar_label:  VersionedCollapsingMergeTree
 ---
--- a/docs/en/engines/table-engines/special/buffer.md
+++ b/docs/en/engines/table-engines/special/buffer.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/table-engines/special/buffer
 sidebar_position: 120
 sidebar_label:  Buffer
 ---
--- a/docs/en/engines/table-engines/special/dictionary.md
+++ b/docs/en/engines/table-engines/special/dictionary.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/table-engines/special/dictionary
 sidebar_position: 20
 sidebar_label: Dictionary
 ---
--- a/docs/en/engines/table-engines/special/distributed.md
+++ b/docs/en/engines/table-engines/special/distributed.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/table-engines/special/distributed
 sidebar_position: 10
 sidebar_label: Distributed
 ---
--- a/docs/en/engines/table-engines/special/external-data.md
+++ b/docs/en/engines/table-engines/special/external-data.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/table-engines/special/external-data
 sidebar_position: 130
 sidebar_label: External Data
 ---
--- a/docs/en/engines/table-engines/special/file.md
+++ b/docs/en/engines/table-engines/special/file.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/table-engines/special/file
 sidebar_position: 40
 sidebar_label:  File
 ---
--- a/docs/en/engines/table-engines/special/generate.md
+++ b/docs/en/engines/table-engines/special/generate.md
@ -1,10 +1,10 @@
 ---
+slug: /en/engines/table-engines/special/generate
 sidebar_position: 140
 sidebar_label:  GenerateRandom
+title: "GenerateRandom Table Engine"
 ---

-# GenerateRandom Table Engine
-
 The GenerateRandom table engine produces random data for given table schema.

 Usage examples:
--- a/docs/en/engines/table-engines/special/index.md
+++ b/docs/en/engines/table-engines/special/index.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/table-engines/special/
 sidebar_position: 50
 sidebar_label: Special
 ---
--- a/docs/en/engines/table-engines/special/join.md
+++ b/docs/en/engines/table-engines/special/join.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/table-engines/special/join
 sidebar_position: 70
 sidebar_label: Join
 ---
--- a/docs/en/engines/table-engines/special/materializedview.md
+++ b/docs/en/engines/table-engines/special/materializedview.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/table-engines/special/materializedview
 sidebar_position: 100
 sidebar_label: MaterializedView
 ---
--- a/docs/en/engines/table-engines/special/memory.md
+++ b/docs/en/engines/table-engines/special/memory.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/table-engines/special/memory
 sidebar_position: 110
 sidebar_label:  Memory
 ---
--- a/docs/en/engines/table-engines/special/merge.md
+++ b/docs/en/engines/table-engines/special/merge.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/table-engines/special/merge
 sidebar_position: 30
 sidebar_label: Merge
 ---
--- a/docs/en/engines/table-engines/special/null.md
+++ b/docs/en/engines/table-engines/special/null.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/table-engines/special/null
 sidebar_position: 50
 sidebar_label:  'Null'
 ---
--- a/docs/en/engines/table-engines/special/set.md
+++ b/docs/en/engines/table-engines/special/set.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/table-engines/special/set
 sidebar_position: 60
 sidebar_label: Set
 ---
--- a/docs/en/engines/table-engines/special/url.md
+++ b/docs/en/engines/table-engines/special/url.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/table-engines/special/url
 sidebar_position: 80
 sidebar_label:  URL
 ---
@ -13,6 +14,8 @@ Syntax: `URL(URL [,Format] [,CompressionMethod])`

 - The `Format` must be one that ClickHouse can use in `SELECT` queries and, if necessary, in `INSERTs`. For the full list of supported formats, see [Formats](../../../interfaces/formats.md#formats).

+    If this argument is not specified, ClickHouse detectes the format automatically from the suffix of the `URL` parameter. If the suffix of `URL` parameter does not match any supported formats, it fails to create table. For example, for engine expression `URL('http://localhost/test.json')`, `JSON` format is applied.
+
 - `CompressionMethod` indicates that whether the HTTP body should be compressed. If the compression is enabled, the HTTP packets sent by the URL engine contain 'Content-Encoding' header to indicate which compression method is used.

 To enable compression, please first make sure the remote HTTP endpoint indicated by the `URL` parameter supports corresponding compression algorithm.
@ -27,6 +30,11 @@ The supported `CompressionMethod` should be one of following:
 - bz2
 - snappy
 - none
+- auto
+
+If `CompressionMethod` is not specified, it defaults to `auto`. This means ClickHouse detects compression method from the suffix of `URL` parameter automatically. If the suffix matches any of compression method listed above, corresponding compression is applied or there won't be any compression enabled.
+
+For example, for engine expression `URL('http://localhost/test.gzip')`, `gzip` compression method is applied, but for `URL('http://localhost/test.fr')`, no compression is enabled because the suffix `fr` does not match any compression methods above.

 ## Usage {#using-the-engine-in-the-clickhouse-server}

--- a/docs/en/engines/table-engines/special/view.md
+++ b/docs/en/engines/table-engines/special/view.md
@ -1,4 +1,5 @@
 ---
+slug: /en/engines/table-engines/special/view
 sidebar_position: 90
 sidebar_label:  View
 ---
--- a/docs/en/getting-started/example-datasets/amplab-benchmark.md
+++ b/docs/en/getting-started/example-datasets/amplab-benchmark.md
@ -1,4 +1,5 @@
 ---
+slug: /en/getting-started/example-datasets/amplab-benchmark
 sidebar_label: AMPLab Big Data Benchmark
 description: A benchmark dataset used for comparing the performance of data warehousing solutions.
 ---
--- a/docs/en/getting-started/example-datasets/brown-benchmark.md
+++ b/docs/en/getting-started/example-datasets/brown-benchmark.md
@ -1,10 +1,10 @@
 ---
+slug: /en/getting-started/example-datasets/brown-benchmark
 sidebar_label: Brown University Benchmark
 description: A new analytical benchmark for machine-generated log data
+title: "Brown University Benchmark"
 ---

-# Brown University Benchmark
-
 `MgBench` is a new analytical benchmark for machine-generated log data, [Andrew Crotty](http://cs.brown.edu/people/acrotty/).

 Download the data:
--- a/docs/en/getting-started/example-datasets/cell-towers.md
+++ b/docs/en/getting-started/example-datasets/cell-towers.md
@ -1,9 +1,9 @@
 ---
+slug: /en/getting-started/example-datasets/cell-towers
 sidebar_label: Cell Towers
+title: "Cell Towers"
 ---

-# Cell Towers 
-
 This dataset is from [OpenCellid](https://www.opencellid.org/) - The world's largest Open Database of Cell Towers.

 As of 2021, it contains more than 40 million records about cell towers (GSM, LTE, UMTS, etc.) around the world with their geographical coordinates and metadata (country code, network, etc).
--- a/docs/en/getting-started/example-datasets/criteo.md
+++ b/docs/en/getting-started/example-datasets/criteo.md
@ -1,4 +1,5 @@
 ---
+slug: /en/getting-started/example-datasets/criteo
 sidebar_label: Terabyte Click Logs from Criteo
 ---

--- a/docs/en/getting-started/example-datasets/github-events.md
+++ b/docs/en/getting-started/example-datasets/github-events.md
@ -1,9 +1,9 @@
 ---
+slug: /en/getting-started/example-datasets/github-events
 sidebar_label: GitHub Events
+title: "GitHub Events Dataset"
 ---

-# GitHub Events Dataset
-
 Dataset contains all events on GitHub from 2011 to Dec 6 2020, the size is 3.1 billion records. Download size is 75 GB and it will require up to 200 GB space on disk if stored in a table with lz4 compression.

 Full dataset description, insights, download instruction and interactive queries are posted [here](https://ghe.clickhouse.tech/).
--- a/docs/en/getting-started/example-datasets/menus.md
+++ b/docs/en/getting-started/example-datasets/menus.md
@ -1,9 +1,9 @@
 ---
+slug: /en/getting-started/example-datasets/menus
 sidebar_label: New York Public Library "What's on the Menu?" Dataset
+title: "New York Public Library \"What's on the Menu?\" Dataset"
 ---

-# New York Public Library "What's on the Menu?" Dataset
-
 The dataset is created by the New York Public Library. It contains historical data on the menus of hotels, restaurants and cafes with the dishes along with their prices.

 Source: http://menus.nypl.org/data
--- a/docs/en/getting-started/example-datasets/metrica.md
+++ b/docs/en/getting-started/example-datasets/metrica.md
@ -1,4 +1,5 @@
 ---
+slug: /en/getting-started/example-datasets/metrica
 sidebar_label: Web Analytics Data
 description: Dataset consisting of two tables containing anonymized web analytics data with hits and visits
 ---
--- a/docs/en/getting-started/example-datasets/nyc-taxi.md
+++ b/docs/en/getting-started/example-datasets/nyc-taxi.md
@ -1,4 +1,5 @@
 ---
+slug: /en/getting-started/example-datasets/nyc-taxi
 sidebar_label: New York Taxi Data
 sidebar_position: 2
 description: Data for billions of taxi and for-hire vehicle (Uber, Lyft, etc.) trips originating in New York City since 2009
--- a/docs/en/getting-started/example-datasets/ontime.md
+++ b/docs/en/getting-started/example-datasets/ontime.md
@ -1,4 +1,5 @@
 ---
+slug: /en/getting-started/example-datasets/ontime
 sidebar_label: OnTime Airline Flight Data
 description: Dataset containing the on-time performance of airline flights
 ---
--- a/docs/en/getting-started/example-datasets/opensky.md
+++ b/docs/en/getting-started/example-datasets/opensky.md
@ -1,10 +1,10 @@
 ---
+slug: /en/getting-started/example-datasets/opensky
 sidebar_label: Air Traffic Data
 description: The data in this dataset is derived and cleaned from the full OpenSky dataset to illustrate the development of air traffic during the COVID-19 pandemic. 
+title: "Crowdsourced air traffic data from The OpenSky Network 2020"
 ---

-# Crowdsourced air traffic data from The OpenSky Network 2020
-
 The data in this dataset is derived and cleaned from the full OpenSky dataset to illustrate the development of air traffic during the COVID-19 pandemic. It spans all flights seen by the network's more than 2500 members since 1 January 2019. More data will be periodically included in the dataset until the end of the COVID-19 pandemic.

 Source: https://zenodo.org/record/5092942#.YRBCyTpRXYd
--- a/docs/en/getting-started/example-datasets/recipes.md
+++ b/docs/en/getting-started/example-datasets/recipes.md
@ -1,9 +1,9 @@
 ---
+slug: /en/getting-started/example-datasets/recipes
 sidebar_label: Recipes Dataset
+title: "Recipes Dataset"
 ---

-# Recipes Dataset
-
 RecipeNLG dataset is available for download [here](https://recipenlg.cs.put.poznan.pl/dataset). It contains 2.2 million recipes. The size is slightly less than 1 GB.

 ## Download and Unpack the Dataset
--- a/Show More
+++ b/Show More
				`@ -0,0 +1 @@`
				`Subproject commit 9d8a603a4cd252448589e84c9846f94368d5a289`