mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 23:21:59 +00:00
Merge branch 'kssenii-fix-write-through-cache-logical-error' into stress_s3
This commit is contained in:
commit
5a0c5f0542
4
.gitmodules
vendored
4
.gitmodules
vendored
@ -259,6 +259,10 @@
|
||||
[submodule "contrib/minizip-ng"]
|
||||
path = contrib/minizip-ng
|
||||
url = https://github.com/zlib-ng/minizip-ng
|
||||
[submodule "contrib/annoy"]
|
||||
path = contrib/annoy
|
||||
url = https://github.com/ClickHouse/annoy.git
|
||||
branch = ClickHouse-master
|
||||
[submodule "contrib/qpl"]
|
||||
path = contrib/qpl
|
||||
url = https://github.com/intel/qpl.git
|
||||
|
@ -164,7 +164,6 @@ if (HAS_RESERVED_IDENTIFIER)
|
||||
add_compile_definitions (HAS_RESERVED_IDENTIFIER)
|
||||
endif ()
|
||||
|
||||
# If turned `ON`, assumes the user has either the system GTest library or the bundled one.
|
||||
option(ENABLE_TESTS "Provide unit_test_dbms target with Google.Test unit tests" ON)
|
||||
option(ENABLE_EXAMPLES "Build all example programs in 'examples' subdirectories" OFF)
|
||||
|
||||
@ -200,8 +199,8 @@ endif ()
|
||||
option(ADD_GDB_INDEX_FOR_GOLD "Add .gdb-index to resulting binaries for gold linker.")
|
||||
|
||||
if (NOT CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE")
|
||||
# Can be lld or ld-lld.
|
||||
if (LINKER_NAME MATCHES "lld$")
|
||||
# Can be lld or ld-lld or lld-13 or /path/to/lld.
|
||||
if (LINKER_NAME MATCHES "lld")
|
||||
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gdb-index")
|
||||
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--gdb-index")
|
||||
message (STATUS "Adding .gdb-index via --gdb-index linker option.")
|
||||
@ -246,7 +245,8 @@ else ()
|
||||
endif ()
|
||||
|
||||
# Create BuildID when using lld. For other linkers it is created by default.
|
||||
if (LINKER_NAME MATCHES "lld$")
|
||||
# (NOTE: LINKER_NAME can be either path or name, and in different variants)
|
||||
if (LINKER_NAME MATCHES "lld")
|
||||
# SHA1 is not cryptographically secure but it is the best what lld is offering.
|
||||
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--build-id=sha1")
|
||||
endif ()
|
||||
@ -601,6 +601,7 @@ if (NATIVE_BUILD_TARGETS
|
||||
"-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}"
|
||||
"-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}"
|
||||
"-DENABLE_CCACHE=${ENABLE_CCACHE}"
|
||||
"-DENABLE_CLICKHOUSE_SELF_EXTRACTING=${ENABLE_CLICKHOUSE_SELF_EXTRACTING}"
|
||||
${CMAKE_SOURCE_DIR}
|
||||
WORKING_DIRECTORY "${NATIVE_BUILD_DIR}"
|
||||
COMMAND_ECHO STDOUT)
|
||||
|
@ -7,12 +7,8 @@
|
||||
# How to install Ninja on Ubuntu:
|
||||
# sudo apt-get install ninja-build
|
||||
|
||||
# CLion does not support Ninja
|
||||
# You can add your vote on CLion task tracker:
|
||||
# https://youtrack.jetbrains.com/issue/CPP-2659
|
||||
# https://youtrack.jetbrains.com/issue/CPP-870
|
||||
|
||||
if (NOT DEFINED ENV{CLION_IDE} AND NOT DEFINED ENV{XCODE_IDE})
|
||||
if (NOT DEFINED ENV{XCODE_IDE})
|
||||
find_program(NINJA_PATH ninja)
|
||||
if (NINJA_PATH)
|
||||
set(CMAKE_GENERATOR "Ninja" CACHE INTERNAL "")
|
||||
|
2
contrib/CMakeLists.txt
vendored
2
contrib/CMakeLists.txt
vendored
@ -159,6 +159,8 @@ add_contrib (s2geometry-cmake s2geometry)
|
||||
add_contrib (c-ares-cmake c-ares)
|
||||
add_contrib (qpl-cmake qpl)
|
||||
|
||||
add_contrib(annoy-cmake annoy)
|
||||
|
||||
# Put all targets defined here and in subdirectories under "contrib/<immediate-subdir>" folders in GUI-based IDEs.
|
||||
# Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear
|
||||
# in "contrib/..." as originally planned, so we workaround this by fixing FOLDER properties of all targets manually,
|
||||
|
1
contrib/annoy
vendored
Submodule
1
contrib/annoy
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 9d8a603a4cd252448589e84c9846f94368d5a289
|
16
contrib/annoy-cmake/CMakeLists.txt
Normal file
16
contrib/annoy-cmake/CMakeLists.txt
Normal file
@ -0,0 +1,16 @@
|
||||
option(ENABLE_ANNOY "Enable Annoy index support" ${ENABLE_LIBRARIES})
|
||||
|
||||
if ((NOT ENABLE_ANNOY) OR (SANITIZE STREQUAL "undefined"))
|
||||
message (STATUS "Not using annoy")
|
||||
return()
|
||||
endif()
|
||||
|
||||
set(ANNOY_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/annoy")
|
||||
set(ANNOY_SOURCE_DIR "${ANNOY_PROJECT_DIR}/src")
|
||||
|
||||
add_library(_annoy INTERFACE)
|
||||
target_include_directories(_annoy SYSTEM INTERFACE ${ANNOY_SOURCE_DIR})
|
||||
|
||||
add_library(ch_contrib::annoy ALIAS _annoy)
|
||||
target_compile_definitions(_annoy INTERFACE ENABLE_ANNOY)
|
||||
target_compile_definitions(_annoy INTERFACE ANNOYLIB_MULTITHREADED_BUILD)
|
@ -78,6 +78,7 @@ RUN export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \
|
||||
&& apt-get update \
|
||||
&& apt-get install \
|
||||
clang-15 \
|
||||
llvm-15 \
|
||||
clang-tidy-15 \
|
||||
--yes --no-install-recommends \
|
||||
&& apt-get clean
|
||||
|
@ -26,7 +26,7 @@ ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||
|
||||
# set non-empty deb_location_url url to create a docker image
|
||||
# from debs created by CI build, for example:
|
||||
# docker build . --network host --build-arg version="21.4.1.6282" --build-arg deb_location_url="https://clickhouse-builds.s3.yandex.net/21852/069cfbff388b3d478d1a16dc7060b48073f5d522/clickhouse_build_check/clang-11_relwithdebuginfo_none_bundled_unsplitted_disable_False_deb/" -t filimonovq/clickhouse-server:pr21852
|
||||
# docker build . --network host --build-arg version="21.4.1.6282" --build-arg deb_location_url="https://..." -t ...
|
||||
ARG deb_location_url=""
|
||||
|
||||
# set non-empty single_binary_location_url to create docker image
|
||||
|
@ -12,7 +12,7 @@ stage=${stage:-}
|
||||
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
echo "$script_dir"
|
||||
repo_dir=ch
|
||||
BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-14_debug_none_bundled_unsplitted_disable_False_binary"}
|
||||
BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-14_debug_none_unsplitted_disable_False_binary"}
|
||||
BINARY_URL_TO_DOWNLOAD=${BINARY_URL_TO_DOWNLOAD:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/$BINARY_TO_DOWNLOAD/clickhouse"}
|
||||
|
||||
function clone
|
||||
|
@ -2,7 +2,7 @@
|
||||
set -euo pipefail
|
||||
|
||||
|
||||
CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-14_relwithdebuginfo_none_bundled_unsplitted_disable_False_binary/clickhouse"}
|
||||
CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-14_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"}
|
||||
CLICKHOUSE_REPO_PATH=${CLICKHOUSE_REPO_PATH:=""}
|
||||
|
||||
|
||||
|
@ -88,13 +88,15 @@ sleep 5
|
||||
function run_tests()
|
||||
{
|
||||
set -x
|
||||
# We can have several additional options so we path them as array because it's
|
||||
# more idiologically correct.
|
||||
# We can have several additional options so we pass them as array because it is more ideologically correct.
|
||||
read -ra ADDITIONAL_OPTIONS <<< "${ADDITIONAL_OPTIONS:-}"
|
||||
|
||||
HIGH_LEVEL_COVERAGE=YES
|
||||
|
||||
# Use random order in flaky check
|
||||
if [ "$NUM_TRIES" -gt "1" ]; then
|
||||
ADDITIONAL_OPTIONS+=('--order=random')
|
||||
HIGH_LEVEL_COVERAGE=NO
|
||||
fi
|
||||
|
||||
if [[ -n "$USE_S3_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_S3_STORAGE_FOR_MERGE_TREE" -eq 1 ]]; then
|
||||
@ -117,12 +119,17 @@ function run_tests()
|
||||
ADDITIONAL_OPTIONS+=("$RUN_BY_HASH_NUM")
|
||||
ADDITIONAL_OPTIONS+=('--run-by-hash-total')
|
||||
ADDITIONAL_OPTIONS+=("$RUN_BY_HASH_TOTAL")
|
||||
HIGH_LEVEL_COVERAGE=NO
|
||||
fi
|
||||
|
||||
if [[ -n "$USE_DATABASE_ORDINARY" ]] && [[ "$USE_DATABASE_ORDINARY" -eq 1 ]]; then
|
||||
ADDITIONAL_OPTIONS+=('--db-engine=Ordinary')
|
||||
fi
|
||||
|
||||
if [[ "${HIGH_LEVEL_COVERAGE}" = "YES" ]]; then
|
||||
ADDITIONAL_OPTIONS+=('--report-coverage')
|
||||
fi
|
||||
|
||||
set +e
|
||||
clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \
|
||||
--test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \
|
||||
|
@ -1,4 +1,5 @@
|
||||
#!/bin/bash
|
||||
# shellcheck disable=SC2024
|
||||
|
||||
set -e -x -a -u
|
||||
|
||||
@ -9,7 +10,7 @@ cd hadoop-3.3.1
|
||||
export JAVA_HOME=/usr
|
||||
mkdir -p target/test/data
|
||||
chown clickhouse ./target/test/data
|
||||
sudo -E -u clickhouse bin/mapred minicluster -format -nomr -nnport 12222 &
|
||||
sudo -E -u clickhouse bin/mapred minicluster -format -nomr -nnport 12222 >> /test_output/garbage.log 2>&1 &
|
||||
|
||||
while ! nc -z localhost 12222; do
|
||||
sleep 1
|
||||
|
@ -105,12 +105,16 @@ EOL
|
||||
|
||||
function stop()
|
||||
{
|
||||
local pid
|
||||
# Preserve the pid, since the server can hung after the PID will be deleted.
|
||||
pid="$(cat /var/run/clickhouse-server/clickhouse-server.pid)"
|
||||
|
||||
clickhouse stop --do-not-kill && return
|
||||
# We failed to stop the server with SIGTERM. Maybe it hang, let's collect stacktraces.
|
||||
kill -TERM "$(pidof gdb)" ||:
|
||||
sleep 5
|
||||
echo "thread apply all backtrace (on stop)" >> /test_output/gdb.log
|
||||
gdb -batch -ex 'thread apply all backtrace' -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log
|
||||
gdb -batch -ex 'thread apply all backtrace' -p "$pid" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log
|
||||
clickhouse stop --force
|
||||
}
|
||||
|
||||
@ -334,7 +338,6 @@ else
|
||||
rm -rf /var/lib/clickhouse/*
|
||||
|
||||
# Make BC check more funny by forcing Ordinary engine for system database
|
||||
# New version will try to convert it to Atomic on startup
|
||||
mkdir /var/lib/clickhouse/metadata
|
||||
echo "ATTACH DATABASE system ENGINE=Ordinary" > /var/lib/clickhouse/metadata/system.sql
|
||||
|
||||
@ -344,16 +347,13 @@ else
|
||||
# Start server from previous release
|
||||
configure
|
||||
|
||||
# Avoid "Setting allow_deprecated_database_ordinary is neither a builtin setting..."
|
||||
rm -f /etc/clickhouse-server/users.d/database_ordinary.xml ||:
|
||||
# Avoid "Setting s3_check_objects_after_upload is neither a builtin setting..."
|
||||
rm -f /etc/clickhouse-server/users.d/enable_blobs_check.xml ||:
|
||||
|
||||
# Remove s3 related configs to avoid "there is no disk type `cache`"
|
||||
rm -f /etc/clickhouse-server/config.d/storage_conf.xml ||:
|
||||
rm -f /etc/clickhouse-server/config.d/azure_storage_conf.xml ||:
|
||||
|
||||
# Disable aggressive cleanup of tmp dirs (it worked incorrectly before 22.8)
|
||||
rm -f /etc/clickhouse-server/config.d/merge_tree_old_dirs_cleanup.xml ||:
|
||||
|
||||
start
|
||||
|
||||
clickhouse-client --query="SELECT 'Server version: ', version()"
|
||||
@ -476,6 +476,13 @@ else
|
||||
[ -s /test_output/bc_check_fatal_messages.txt ] || rm /test_output/bc_check_fatal_messages.txt
|
||||
fi
|
||||
|
||||
dmesg -T > /test_output/dmesg.log
|
||||
|
||||
# OOM in dmesg -- those are real
|
||||
grep -q -F -e 'Out of memory: Killed process' -e 'oom_reaper: reaped process' -e 'oom-kill:constraint=CONSTRAINT_NONE' /test_output/dmesg.log \
|
||||
&& echo -e 'OOM in dmesg\tFAIL' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'No OOM in dmesg\tOK' >> /test_output/test_results.tsv
|
||||
|
||||
tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||:
|
||||
mv /var/log/clickhouse-server/stderr.log /test_output/
|
||||
|
||||
@ -497,5 +504,3 @@ for core in core.*; do
|
||||
pigz $core
|
||||
mv $core.gz /test_output/
|
||||
done
|
||||
|
||||
dmesg -T > /test_output/dmesg.log
|
||||
|
@ -14,8 +14,6 @@ do
|
||||
|| curl -fO "https://packages.clickhouse.com/tgz/stable/$PKG-$LATEST_VERSION.tgz"
|
||||
done
|
||||
|
||||
exit 0
|
||||
|
||||
tar -xzvf "clickhouse-common-static-$LATEST_VERSION-${ARCH}.tgz" \
|
||||
|| tar -xzvf "clickhouse-common-static-$LATEST_VERSION.tgz"
|
||||
sudo "clickhouse-common-static-$LATEST_VERSION/install/doinst.sh"
|
||||
@ -26,7 +24,7 @@ sudo "clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh"
|
||||
|
||||
tar -xzvf "clickhouse-server-$LATEST_VERSION-${ARCH}.tgz" \
|
||||
|| tar -xzvf "clickhouse-server-$LATEST_VERSION.tgz"
|
||||
sudo "clickhouse-server-$LATEST_VERSION/install/doinst.sh"
|
||||
sudo "clickhouse-server-$LATEST_VERSION/install/doinst.sh" configure
|
||||
sudo /etc/init.d/clickhouse-server start
|
||||
|
||||
tar -xzvf "clickhouse-client-$LATEST_VERSION-${ARCH}.tgz" \
|
||||
|
25
docs/changelogs/v22.3.12.19-lts.md
Normal file
25
docs/changelogs/v22.3.12.19-lts.md
Normal file
@ -0,0 +1,25 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2022
|
||||
---
|
||||
|
||||
# 2022 Changelog
|
||||
|
||||
### ClickHouse release v22.3.12.19-lts (4a08f8a073b) FIXME as compared to v22.3.11.12-lts (137c5f72657)
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
* Backported in [#40695](https://github.com/ClickHouse/ClickHouse/issues/40695): Fix TGZ packages. [#40681](https://github.com/ClickHouse/ClickHouse/pull/40681) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
|
||||
|
||||
* Backported in [#40160](https://github.com/ClickHouse/ClickHouse/issues/40160): fix HashMethodOneNumber get wrong key value when column is const. [#40020](https://github.com/ClickHouse/ClickHouse/pull/40020) ([Duc Canh Le](https://github.com/canhld94)).
|
||||
* Backported in [#40122](https://github.com/ClickHouse/ClickHouse/issues/40122): Fix bug in collectFilesToSkip() by adding correct file extension(.idx or idx2) for indexes to be recalculated, avoid wrong hard links. Fixed [#39896](https://github.com/ClickHouse/ClickHouse/issues/39896). [#40095](https://github.com/ClickHouse/ClickHouse/pull/40095) ([Jianmei Zhang](https://github.com/zhangjmruc)).
|
||||
* Backported in [#40207](https://github.com/ClickHouse/ClickHouse/issues/40207): Fix insufficient argument check for encryption functions (found by query fuzzer). This closes [#39987](https://github.com/ClickHouse/ClickHouse/issues/39987). [#40194](https://github.com/ClickHouse/ClickHouse/pull/40194) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Backported in [#40270](https://github.com/ClickHouse/ClickHouse/issues/40270): Fix possible segfault in CapnProto input format. This bug was found and send through ClickHouse bug-bounty [program](https://github.com/ClickHouse/ClickHouse/issues/38986) by *kiojj*. [#40241](https://github.com/ClickHouse/ClickHouse/pull/40241) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* fix heap buffer overflow by limiting http chunk size [#40292](https://github.com/ClickHouse/ClickHouse/pull/40292) ([Sema Checherinda](https://github.com/CheSema)).
|
||||
* Reduce changelog verbosity in CI [#40360](https://github.com/ClickHouse/ClickHouse/pull/40360) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backport the upstream clickhouse_helper.py [#40490](https://github.com/ClickHouse/ClickHouse/pull/40490) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
17
docs/changelogs/v22.6.7.7-stable.md
Normal file
17
docs/changelogs/v22.6.7.7-stable.md
Normal file
@ -0,0 +1,17 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2022
|
||||
---
|
||||
|
||||
# 2022 Changelog
|
||||
|
||||
### ClickHouse release v22.6.7.7-stable (8eae2af3b9a) FIXME as compared to v22.6.6.16-stable (d2a33ebc822)
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
* Backported in [#40692](https://github.com/ClickHouse/ClickHouse/issues/40692): Fix TGZ packages. [#40681](https://github.com/ClickHouse/ClickHouse/pull/40681) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
|
||||
|
||||
* Backported in [#40531](https://github.com/ClickHouse/ClickHouse/issues/40531): Proxy resolver stop on first successful request to endpoint. [#40353](https://github.com/ClickHouse/ClickHouse/pull/40353) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Backported in [#40623](https://github.com/ClickHouse/ClickHouse/issues/40623): Fix potential dataloss due to a bug in AWS SDK (https://github.com/aws/aws-sdk-cpp/issues/658). Bug can be triggered only when clickhouse is used over S3. [#40506](https://github.com/ClickHouse/ClickHouse/pull/40506) ([alesapin](https://github.com/alesapin)).
|
||||
|
23
docs/changelogs/v22.7.5.13-stable.md
Normal file
23
docs/changelogs/v22.7.5.13-stable.md
Normal file
@ -0,0 +1,23 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2022
|
||||
---
|
||||
|
||||
# 2022 Changelog
|
||||
|
||||
### ClickHouse release v22.7.5.13-stable (6f48d2d1f59) FIXME as compared to v22.7.4.16-stable (0b9272f8fdc)
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
* Backported in [#40693](https://github.com/ClickHouse/ClickHouse/issues/40693): Fix TGZ packages. [#40681](https://github.com/ClickHouse/ClickHouse/pull/40681) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
|
||||
|
||||
* Backported in [#40542](https://github.com/ClickHouse/ClickHouse/issues/40542): Fix potential deadlock in WriteBufferFromS3 during task scheduling failure. [#40070](https://github.com/ClickHouse/ClickHouse/pull/40070) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Backported in [#40450](https://github.com/ClickHouse/ClickHouse/issues/40450): Fix rare bug with column TTL for MergeTree engines family: In case of repeated vertical merge the error `Cannot unlink file ColumnName.bin ... No such file or directory.` could happen. [#40346](https://github.com/ClickHouse/ClickHouse/pull/40346) ([alesapin](https://github.com/alesapin)).
|
||||
* Backported in [#40532](https://github.com/ClickHouse/ClickHouse/issues/40532): Proxy resolver stop on first successful request to endpoint. [#40353](https://github.com/ClickHouse/ClickHouse/pull/40353) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Backported in [#40624](https://github.com/ClickHouse/ClickHouse/issues/40624): Fix potential dataloss due to a bug in AWS SDK (https://github.com/aws/aws-sdk-cpp/issues/658). Bug can be triggered only when clickhouse is used over S3. [#40506](https://github.com/ClickHouse/ClickHouse/pull/40506) ([alesapin](https://github.com/alesapin)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* DNSResolver remove AI_V4MAPPED, AI_ALL hints [#40502](https://github.com/ClickHouse/ClickHouse/pull/40502) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
|
25
docs/changelogs/v22.8.3.13-lts.md
Normal file
25
docs/changelogs/v22.8.3.13-lts.md
Normal file
@ -0,0 +1,25 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2022
|
||||
---
|
||||
|
||||
# 2022 Changelog
|
||||
|
||||
### ClickHouse release v22.8.3.13-lts (6a15b73faea) FIXME as compared to v22.8.2.11-lts (b4ed6d744ff)
|
||||
|
||||
#### Improvement
|
||||
* Backported in [#40550](https://github.com/ClickHouse/ClickHouse/issues/40550): Improve schema inference cache, respect format settings that can change the schema. [#40414](https://github.com/ClickHouse/ClickHouse/pull/40414) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
* Backported in [#40694](https://github.com/ClickHouse/ClickHouse/issues/40694): Fix TGZ packages. [#40681](https://github.com/ClickHouse/ClickHouse/pull/40681) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
|
||||
|
||||
* Backported in [#40451](https://github.com/ClickHouse/ClickHouse/issues/40451): Fix rare bug with column TTL for MergeTree engines family: In case of repeated vertical merge the error `Cannot unlink file ColumnName.bin ... No such file or directory.` could happen. [#40346](https://github.com/ClickHouse/ClickHouse/pull/40346) ([alesapin](https://github.com/alesapin)).
|
||||
* Backported in [#40533](https://github.com/ClickHouse/ClickHouse/issues/40533): Proxy resolver stop on first successful request to endpoint. [#40353](https://github.com/ClickHouse/ClickHouse/pull/40353) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Backported in [#40625](https://github.com/ClickHouse/ClickHouse/issues/40625): Fix potential dataloss due to a bug in AWS SDK (https://github.com/aws/aws-sdk-cpp/issues/658). Bug can be triggered only when clickhouse is used over S3. [#40506](https://github.com/ClickHouse/ClickHouse/pull/40506) ([alesapin](https://github.com/alesapin)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* DNSResolver remove AI_V4MAPPED, AI_ALL hints [#40502](https://github.com/ClickHouse/ClickHouse/pull/40502) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
|
@ -1,10 +1,11 @@
|
||||
---
|
||||
slug: /en/development/adding_test_queries
|
||||
sidebar_label: Adding Test Queries
|
||||
sidebar_position: 63
|
||||
title: How to add test queries to ClickHouse CI
|
||||
description: Instructions on how to add a test case to ClickHouse continuous integration
|
||||
---
|
||||
|
||||
# How to add test queries to ClickHouse CI
|
||||
|
||||
ClickHouse has hundreds (or even thousands) of features. Every commit gets checked by a complex set of tests containing many thousands of test cases.
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/development/architecture
|
||||
sidebar_label: Architecture Overview
|
||||
sidebar_position: 62
|
||||
---
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/development/browse-code
|
||||
sidebar_label: Source Code Browser
|
||||
sidebar_position: 72
|
||||
description: Various ways to browse and edit the source code
|
||||
|
@ -1,10 +1,10 @@
|
||||
---
|
||||
slug: /en/development/build-cross-arm
|
||||
sidebar_position: 67
|
||||
title: How to Build ClickHouse on Linux for AARCH64 (ARM64) Architecture
|
||||
sidebar_label: Build on Linux for AARCH64 (ARM64)
|
||||
---
|
||||
|
||||
# How to Build ClickHouse on Linux for AARCH64 (ARM64) Architecture
|
||||
|
||||
If you use AArch64 machine and want to build ClickHouse for AArch64, build as usual.
|
||||
|
||||
If you use x86_64 machine and want cross-compile for AArch64, add the following flag to `cmake`: `-DCMAKE_TOOLCHAIN_FILE=cmake/linux/toolchain-aarch64.cmake`
|
||||
|
@ -1,9 +1,10 @@
|
||||
---
|
||||
slug: /en/development/build-cross-osx
|
||||
sidebar_position: 66
|
||||
title: How to Build ClickHouse on Linux for Mac OS X
|
||||
sidebar_label: Build on Linux for Mac OS X
|
||||
---
|
||||
|
||||
# How to Build ClickHouse on Linux for Mac OS X
|
||||
|
||||
This is for the case when you have a Linux machine and want to use it to build `clickhouse` binary that will run on OS X.
|
||||
This is intended for continuous integration checks that run on Linux servers. If you want to build ClickHouse directly on Mac OS X, then proceed with [another instruction](../development/build-osx.md).
|
||||
|
@ -1,10 +1,10 @@
|
||||
---
|
||||
slug: /en/development/build-cross-riscv
|
||||
sidebar_position: 68
|
||||
title: How to Build ClickHouse on Linux for RISC-V 64 Architecture
|
||||
sidebar_label: Build on Linux for RISC-V 64
|
||||
---
|
||||
|
||||
# How to Build ClickHouse on Linux for RISC-V 64 Architecture
|
||||
|
||||
As of writing (11.11.2021) building for risc-v considered to be highly experimental. Not all features can be enabled.
|
||||
|
||||
This is for the case when you have Linux machine and want to use it to build `clickhouse` binary that will run on another Linux machine with RISC-V 64 CPU architecture. This is intended for continuous integration checks that run on Linux servers.
|
||||
|
@ -1,11 +1,11 @@
|
||||
---
|
||||
slug: /en/development/build-osx
|
||||
sidebar_position: 65
|
||||
sidebar_label: Build on Mac OS X
|
||||
title: How to Build ClickHouse on Mac OS X
|
||||
description: How to build ClickHouse on Mac OS X
|
||||
---
|
||||
|
||||
# How to Build ClickHouse on Mac OS X
|
||||
|
||||
:::info You don't have to build ClickHouse yourself!
|
||||
You can install pre-built ClickHouse as described in [Quick Start](https://clickhouse.com/#quick-start). Follow **macOS (Intel)** or **macOS (Apple silicon)** installation instructions.
|
||||
:::
|
||||
|
@ -1,10 +1,11 @@
|
||||
---
|
||||
slug: /en/development/build
|
||||
sidebar_position: 64
|
||||
sidebar_label: Build on Linux
|
||||
title: How to Build ClickHouse on Linux
|
||||
description: How to build ClickHouse on Linux
|
||||
---
|
||||
|
||||
# How to Build ClickHouse on Linux
|
||||
|
||||
Supported platforms:
|
||||
|
||||
|
@ -1,11 +1,11 @@
|
||||
---
|
||||
slug: /en/development/continuous-integration
|
||||
sidebar_position: 62
|
||||
sidebar_label: Continuous Integration Checks
|
||||
title: Continuous Integration Checks
|
||||
description: When you submit a pull request, some automated checks are ran for your code by the ClickHouse continuous integration (CI) system
|
||||
---
|
||||
|
||||
# Continuous Integration Checks
|
||||
|
||||
When you submit a pull request, some automated checks are ran for your code by
|
||||
the ClickHouse [continuous integration (CI) system](tests.md#test-automation).
|
||||
This happens after a repository maintainer (someone from ClickHouse team) has
|
||||
@ -54,7 +54,7 @@ the documentation is wrong. Go to the check report and look for `ERROR` and `WAR
|
||||
Check that the description of your pull request conforms to the template
|
||||
[PULL_REQUEST_TEMPLATE.md](https://github.com/ClickHouse/ClickHouse/blob/master/.github/PULL_REQUEST_TEMPLATE.md).
|
||||
You have to specify a changelog category for your change (e.g., Bug Fix), and
|
||||
write a user-readable message describing the change for [CHANGELOG.md](../whats-new/changelog/)
|
||||
write a user-readable message describing the change for [CHANGELOG.md](../whats-new/changelog/index.md)
|
||||
|
||||
|
||||
## Push To DockerHub
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/development/contrib
|
||||
sidebar_position: 71
|
||||
sidebar_label: Third-Party Libraries
|
||||
description: A list of third-party libraries used
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/development/developer-instruction
|
||||
sidebar_position: 61
|
||||
sidebar_label: Getting Started
|
||||
description: Prerequisites and an overview of how to build ClickHouse
|
||||
@ -285,9 +286,4 @@ If you are not interested in functionality provided by third-party libraries, yo
|
||||
-DENABLE_LIBRARIES=0 -DENABLE_EMBEDDED_COMPILER=0
|
||||
```
|
||||
|
||||
Compressing the binary at the end of the build may take a while, disable the self-extraction feature via
|
||||
```
|
||||
-DENABLE_CLICKHOUSE_SELF_EXTRACTING=0
|
||||
```
|
||||
|
||||
In case of problems with any of the development options, you are on your own!
|
||||
|
@ -1,8 +0,0 @@
|
||||
---
|
||||
sidebar_label: Development
|
||||
sidebar_position: 58
|
||||
---
|
||||
|
||||
# ClickHouse Development
|
||||
|
||||
[Original article](https://clickhouse.com/docs/en/development/) <!--hide-->
|
@ -1,3 +1,6 @@
|
||||
---
|
||||
slug: /en/development/integrating_rust_libraries
|
||||
---
|
||||
# Integrating Rust libraries
|
||||
|
||||
Rust library integration will be described based on BLAKE3 hash-function integration.
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/development/style
|
||||
sidebar_position: 69
|
||||
sidebar_label: C++ Guide
|
||||
description: A list of recommendations regarding coding style, naming convention, formatting and more
|
||||
|
@ -1,11 +1,11 @@
|
||||
---
|
||||
slug: /en/development/tests
|
||||
sidebar_position: 70
|
||||
sidebar_label: Testing
|
||||
title: ClickHouse Testing
|
||||
description: Most of ClickHouse features can be tested with functional tests and they are mandatory to use for every change in ClickHouse code that can be tested that way.
|
||||
---
|
||||
|
||||
# ClickHouse Testing
|
||||
|
||||
## Functional Tests
|
||||
|
||||
Functional tests are the most simple and convenient to use. Most of ClickHouse features can be tested with functional tests and they are mandatory to use for every change in ClickHouse code that can be tested that way.
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/database-engines/atomic
|
||||
sidebar_label: Atomic
|
||||
sidebar_position: 10
|
||||
---
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/database-engines/
|
||||
toc_folder_title: Database Engines
|
||||
toc_priority: 27
|
||||
toc_title: Introduction
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/database-engines/lazy
|
||||
sidebar_label: Lazy
|
||||
sidebar_position: 20
|
||||
---
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/database-engines/materialized-mysql
|
||||
sidebar_label: MaterializedMySQL
|
||||
sidebar_position: 70
|
||||
---
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/database-engines/materialized-postgresql
|
||||
sidebar_label: MaterializedPostgreSQL
|
||||
sidebar_position: 60
|
||||
---
|
||||
|
@ -1,9 +1,10 @@
|
||||
---
|
||||
slug: /en/engines/database-engines/mysql
|
||||
sidebar_position: 50
|
||||
sidebar_label: MySQL
|
||||
---
|
||||
|
||||
# MySQL
|
||||
# MySQL
|
||||
|
||||
Allows to connect to databases on a remote MySQL server and perform `INSERT` and `SELECT` queries to exchange data between ClickHouse and MySQL.
|
||||
|
||||
@ -98,7 +99,7 @@ mysql> select * from mysql_table;
|
||||
Database in ClickHouse, exchanging data with the MySQL server:
|
||||
|
||||
``` sql
|
||||
CREATE DATABASE mysql_db ENGINE = MySQL('localhost:3306', 'test', 'my_user', 'user_password')
|
||||
CREATE DATABASE mysql_db ENGINE = MySQL('localhost:3306', 'test', 'my_user', 'user_password') SETTINGS read_write_timeout=10000, connect_timeout=100;
|
||||
```
|
||||
|
||||
``` sql
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/database-engines/postgresql
|
||||
sidebar_position: 40
|
||||
sidebar_label: PostgreSQL
|
||||
---
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/database-engines/replicated
|
||||
sidebar_position: 30
|
||||
sidebar_label: Replicated
|
||||
---
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/database-engines/sqlite
|
||||
sidebar_position: 55
|
||||
sidebar_label: SQLite
|
||||
---
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/
|
||||
toc_folder_title: Table Engines
|
||||
toc_priority: 26
|
||||
toc_title: Introduction
|
||||
|
@ -1,10 +1,10 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/integrations/ExternalDistributed
|
||||
sidebar_position: 12
|
||||
sidebar_label: ExternalDistributed
|
||||
title: ExternalDistributed
|
||||
---
|
||||
|
||||
# ExternalDistributed
|
||||
|
||||
The `ExternalDistributed` engine allows to perform `SELECT` queries on data that is stored on a remote servers MySQL or PostgreSQL. Accepts [MySQL](../../../engines/table-engines/integrations/mysql.md) or [PostgreSQL](../../../engines/table-engines/integrations/postgresql.md) engines as an argument so sharding is possible.
|
||||
|
||||
## Creating a Table {#creating-a-table}
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/integrations/embedded-rocksdb
|
||||
sidebar_position: 9
|
||||
sidebar_label: EmbeddedRocksDB
|
||||
---
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/integrations/hdfs
|
||||
sidebar_position: 6
|
||||
sidebar_label: HDFS
|
||||
---
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/integrations/hive
|
||||
sidebar_position: 4
|
||||
sidebar_label: Hive
|
||||
---
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/integrations/
|
||||
sidebar_position: 40
|
||||
sidebar_label: Integrations
|
||||
---
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/integrations/jdbc
|
||||
sidebar_position: 3
|
||||
sidebar_label: JDBC
|
||||
---
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/integrations/kafka
|
||||
sidebar_position: 8
|
||||
sidebar_label: Kafka
|
||||
---
|
||||
|
@ -1,10 +1,10 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/integrations/materialized-postgresql
|
||||
sidebar_position: 12
|
||||
sidebar_label: MaterializedPostgreSQL
|
||||
title: MaterializedPostgreSQL
|
||||
---
|
||||
|
||||
# MaterializedPostgreSQL
|
||||
|
||||
Creates ClickHouse table with an initial data dump of PostgreSQL table and starts replication process, i.e. executes background job to apply new changes as they happen on PostgreSQL table in the remote PostgreSQL database.
|
||||
|
||||
If more than one table is required, it is highly recommended to use the [MaterializedPostgreSQL](../../../engines/database-engines/materialized-postgresql.md) database engine instead of the table engine and use the `materialized_postgresql_tables_list` setting, which specifies the tables to be replicated (will also be possible to add database `schema`). It will be much better in terms of CPU, fewer connections and fewer replication slots inside the remote PostgreSQL database.
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/integrations/mongodb
|
||||
sidebar_position: 5
|
||||
sidebar_label: MongoDB
|
||||
---
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/integrations/mysql
|
||||
sidebar_position: 4
|
||||
sidebar_label: MySQL
|
||||
---
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/integrations/nats
|
||||
sidebar_position: 14
|
||||
sidebar_label: NATS
|
||||
---
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/integrations/odbc
|
||||
sidebar_position: 2
|
||||
sidebar_label: ODBC
|
||||
---
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/integrations/postgresql
|
||||
sidebar_position: 11
|
||||
sidebar_label: PostgreSQL
|
||||
---
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/integrations/rabbitmq
|
||||
sidebar_position: 10
|
||||
sidebar_label: RabbitMQ
|
||||
---
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/integrations/s3
|
||||
sidebar_position: 7
|
||||
sidebar_label: S3
|
||||
---
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/integrations/sqlite
|
||||
sidebar_position: 7
|
||||
sidebar_label: SQLite
|
||||
---
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/log-family/
|
||||
sidebar_position: 20
|
||||
sidebar_label: Log Family
|
||||
---
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/log-family/log
|
||||
toc_priority: 33
|
||||
toc_title: Log
|
||||
---
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/log-family/stripelog
|
||||
toc_priority: 32
|
||||
toc_title: StripeLog
|
||||
---
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/log-family/tinylog
|
||||
toc_priority: 34
|
||||
toc_title: TinyLog
|
||||
---
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/mergetree-family/aggregatingmergetree
|
||||
sidebar_position: 60
|
||||
sidebar_label: AggregatingMergeTree
|
||||
---
|
||||
|
125
docs/en/engines/table-engines/mergetree-family/annindexes.md
Normal file
125
docs/en/engines/table-engines/mergetree-family/annindexes.md
Normal file
@ -0,0 +1,125 @@
|
||||
# Approximate Nearest Neighbor Search Indexes [experimental] {#table_engines-ANNIndex}
|
||||
|
||||
The main task that indexes achieve is to quickly find nearest neighbors for multidimensional data. An example of such a problem can be finding similar pictures (texts) for a given picture (text). That problem can be reduced to finding the nearest [embeddings](https://cloud.google.com/architecture/overview-extracting-and-serving-feature-embeddings-for-machine-learning). They can be created from data using [UDF](../../../sql-reference/functions/index.md#executable-user-defined-functions).
|
||||
|
||||
The next query finds the closest neighbors in N-dimensional space using the L2 (Euclidean) distance:
|
||||
``` sql
|
||||
SELECT *
|
||||
FROM table_name
|
||||
WHERE L2Distance(Column, Point) < MaxDistance
|
||||
LIMIT N
|
||||
```
|
||||
But it will take some time for execution because of the long calculation of the distance between `TargetEmbedding` and all other vectors. This is where ANN indexes can help. They store a compact approximation of the search space (e.g. using clustering, search trees, etc.) and are able to compute approximate neighbors quickly.
|
||||
|
||||
## Indexes Structure
|
||||
|
||||
Approximate Nearest Neighbor Search Indexes (`ANNIndexes`) are similar to skip indexes. They are constructed by some granules and determine which of them should be skipped. Compared to skip indices, ANN indices use their results not only to skip some group of granules, but also to select particular granules from a set of granules.
|
||||
|
||||
`ANNIndexes` are designed to speed up two types of queries:
|
||||
|
||||
- ###### Type 1: Where
|
||||
``` sql
|
||||
SELECT *
|
||||
FROM table_name
|
||||
WHERE DistanceFunction(Column, Point) < MaxDistance
|
||||
LIMIT N
|
||||
```
|
||||
- ###### Type 2: Order by
|
||||
``` sql
|
||||
SELECT *
|
||||
FROM table_name [WHERE ...]
|
||||
ORDER BY DistanceFunction(Column, Point)
|
||||
LIMIT N
|
||||
```
|
||||
|
||||
In these queries, `DistanceFunction` is selected from [distance functions](../../../sql-reference/functions/distance-functions). `Point` is a known vector (something like `(0.1, 0.1, ... )`). To avoid writing large vectors, use [client parameters](../../../interfaces/cli.md#queries-with-parameters-cli-queries-with-parameters). `Value` - a float value that will bound the neighbourhood.
|
||||
|
||||
!!! note "Note"
|
||||
ANN index can't speed up query that satisfies both types(`where + order by`, only one of them). All queries must have the limit, as algorithms are used to find nearest neighbors and need a specific number of them.
|
||||
|
||||
!!! note "Note"
|
||||
Indexes are applied only to queries with a limit less than the `max_limit_for_ann_queries` setting. This helps to avoid memory overflows in queries with a large limit. `max_limit_for_ann_queries` setting can be changed if you know you can provide enough memory. The default value is `1000000`.
|
||||
|
||||
Both types of queries are handled the same way. The indexes get `n` neighbors (where `n` is taken from the `LIMIT` clause) and work with them. In `ORDER BY` query they remember the numbers of all parts of the granule that have at least one of neighbor. In `WHERE` query they remember only those parts that satisfy the requirements.
|
||||
|
||||
|
||||
|
||||
## Create table with ANNIndex
|
||||
|
||||
```sql
|
||||
CREATE TABLE t
|
||||
(
|
||||
`id` Int64,
|
||||
`number` Tuple(Float32, Float32, Float32),
|
||||
INDEX x number TYPE annoy GRANULARITY N
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id;
|
||||
```
|
||||
|
||||
```sql
|
||||
CREATE TABLE t
|
||||
(
|
||||
`id` Int64,
|
||||
`number` Array(Float32),
|
||||
INDEX x number TYPE annoy GRANULARITY N
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id;
|
||||
```
|
||||
|
||||
With greater `GRANULARITY` indexes remember the data structure better. The `GRANULARITY` indicates how many granules will be used to construct the index. The more data is provided for the index, the more of it can be handled by one index and the more chances that with the right hyperparameters the index will remember the data structure better. But some indexes can't be built if they don't have enough data, so this granule will always participate in the query. For more information, see the description of indexes.
|
||||
|
||||
As the indexes are built only during insertions into table, `INSERT` and `OPTIMIZE` queries are slower than for ordinary table. At this stage indexes remember all the information about the given data. ANNIndexes should be used if you have immutable or rarely changed data and many read requests.
|
||||
|
||||
You can create your table with index which uses certain algorithm. Now only indices based on the following algorithms are supported:
|
||||
|
||||
# Index list
|
||||
- [Annoy](../../../engines/table-engines/mergetree-family/annindexes.md#annoy-annoy)
|
||||
|
||||
# Annoy {#annoy}
|
||||
Implementation of the algorithm was taken from [this repository](https://github.com/spotify/annoy).
|
||||
|
||||
Short description of the algorithm:
|
||||
The algorithm recursively divides in half all space by random linear surfaces (lines in 2D, planes in 3D e.t.c.). Thus it makes tree of polyhedrons and points that they contains. Repeating the operation several times for greater accuracy it creates a forest.
|
||||
To find K Nearest Neighbours it goes down through the trees and fills the buffer of closest points using the priority queue of polyhedrons. Next, it sorts buffer and return the nearest K points.
|
||||
|
||||
__Examples__:
|
||||
```sql
|
||||
CREATE TABLE t
|
||||
(
|
||||
id Int64,
|
||||
number Tuple(Float32, Float32, Float32),
|
||||
INDEX x number TYPE annoy(T) GRANULARITY N
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id;
|
||||
```
|
||||
|
||||
```sql
|
||||
CREATE TABLE t
|
||||
(
|
||||
id Int64,
|
||||
number Array(Float32),
|
||||
INDEX x number TYPE annoy(T) GRANULARITY N
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id;
|
||||
```
|
||||
!!! note "Note"
|
||||
Table with array field will work faster, but all arrays **must** have same length. Use [CONSTRAINT](../../../sql-reference/statements/create/table.md#constraints) to avoid errors. For example, `CONSTRAINT constraint_name_1 CHECK length(number) = 256`.
|
||||
|
||||
Parameter `T` is the number of trees which algorithm will create. The bigger it is, the slower (approximately linear) it works (in both `CREATE` and `SELECT` requests), but the better accuracy you get (adjusted for randomness).
|
||||
|
||||
Annoy supports only `L2Distance`.
|
||||
|
||||
In the `SELECT` in the settings (`ann_index_select_query_params`) you can specify the size of the internal buffer (more details in the description above or in the [original repository](https://github.com/spotify/annoy)). During the query it will inspect up to `search_k` nodes which defaults to `n_trees * n` if not provided. `search_k` gives you a run-time tradeoff between better accuracy and speed.
|
||||
|
||||
__Example__:
|
||||
``` sql
|
||||
SELECT *
|
||||
FROM table_name [WHERE ...]
|
||||
ORDER BY L2Distance(Column, Point)
|
||||
LIMIT N
|
||||
SETTING ann_index_select_query_params=`k_search=100`
|
||||
```
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/mergetree-family/collapsingmergetree
|
||||
sidebar_position: 70
|
||||
sidebar_label: CollapsingMergeTree
|
||||
---
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/mergetree-family/custom-partitioning-key
|
||||
sidebar_position: 30
|
||||
sidebar_label: Custom Partitioning Key
|
||||
---
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/mergetree-family/graphitemergetree
|
||||
sidebar_position: 90
|
||||
sidebar_label: GraphiteMergeTree
|
||||
---
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/mergetree-family/
|
||||
sidebar_position: 10
|
||||
sidebar_label: MergeTree Family
|
||||
---
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/mergetree-family/mergetree
|
||||
sidebar_position: 11
|
||||
sidebar_label: MergeTree
|
||||
---
|
||||
@ -480,6 +481,10 @@ For example:
|
||||
- `NOT startsWith(s, 'test')`
|
||||
:::
|
||||
|
||||
|
||||
## Approximate Nearest Neighbor Search Indexes [experimental] {#table_engines-ANNIndex}
|
||||
In addition to skip indices, there are also [Approximate Nearest Neighbor Search Indexes](../../../engines/table-engines/mergetree-family/annindexes.md).
|
||||
|
||||
## Projections {#projections}
|
||||
Projections are like [materialized views](../../../sql-reference/statements/create/view.md#materialized) but defined in part-level. It provides consistency guarantees along with automatic usage in queries.
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/mergetree-family/replacingmergetree
|
||||
sidebar_position: 40
|
||||
sidebar_label: ReplacingMergeTree
|
||||
---
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/mergetree-family/replication
|
||||
sidebar_position: 20
|
||||
sidebar_label: Data Replication
|
||||
---
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/mergetree-family/summingmergetree
|
||||
sidebar_position: 50
|
||||
sidebar_label: SummingMergeTree
|
||||
---
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/mergetree-family/versionedcollapsingmergetree
|
||||
sidebar_position: 80
|
||||
sidebar_label: VersionedCollapsingMergeTree
|
||||
---
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/special/buffer
|
||||
sidebar_position: 120
|
||||
sidebar_label: Buffer
|
||||
---
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/special/dictionary
|
||||
sidebar_position: 20
|
||||
sidebar_label: Dictionary
|
||||
---
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/special/distributed
|
||||
sidebar_position: 10
|
||||
sidebar_label: Distributed
|
||||
---
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/special/external-data
|
||||
sidebar_position: 130
|
||||
sidebar_label: External Data
|
||||
---
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/special/file
|
||||
sidebar_position: 40
|
||||
sidebar_label: File
|
||||
---
|
||||
|
@ -1,10 +1,10 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/special/generate
|
||||
sidebar_position: 140
|
||||
sidebar_label: GenerateRandom
|
||||
title: "GenerateRandom Table Engine"
|
||||
---
|
||||
|
||||
# GenerateRandom Table Engine
|
||||
|
||||
The GenerateRandom table engine produces random data for given table schema.
|
||||
|
||||
Usage examples:
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/special/
|
||||
sidebar_position: 50
|
||||
sidebar_label: Special
|
||||
---
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/special/join
|
||||
sidebar_position: 70
|
||||
sidebar_label: Join
|
||||
---
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/special/materializedview
|
||||
sidebar_position: 100
|
||||
sidebar_label: MaterializedView
|
||||
---
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/special/memory
|
||||
sidebar_position: 110
|
||||
sidebar_label: Memory
|
||||
---
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/special/merge
|
||||
sidebar_position: 30
|
||||
sidebar_label: Merge
|
||||
---
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/special/null
|
||||
sidebar_position: 50
|
||||
sidebar_label: 'Null'
|
||||
---
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/special/set
|
||||
sidebar_position: 60
|
||||
sidebar_label: Set
|
||||
---
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/special/url
|
||||
sidebar_position: 80
|
||||
sidebar_label: URL
|
||||
---
|
||||
@ -13,6 +14,8 @@ Syntax: `URL(URL [,Format] [,CompressionMethod])`
|
||||
|
||||
- The `Format` must be one that ClickHouse can use in `SELECT` queries and, if necessary, in `INSERTs`. For the full list of supported formats, see [Formats](../../../interfaces/formats.md#formats).
|
||||
|
||||
If this argument is not specified, ClickHouse detectes the format automatically from the suffix of the `URL` parameter. If the suffix of `URL` parameter does not match any supported formats, it fails to create table. For example, for engine expression `URL('http://localhost/test.json')`, `JSON` format is applied.
|
||||
|
||||
- `CompressionMethod` indicates that whether the HTTP body should be compressed. If the compression is enabled, the HTTP packets sent by the URL engine contain 'Content-Encoding' header to indicate which compression method is used.
|
||||
|
||||
To enable compression, please first make sure the remote HTTP endpoint indicated by the `URL` parameter supports corresponding compression algorithm.
|
||||
@ -27,6 +30,11 @@ The supported `CompressionMethod` should be one of following:
|
||||
- bz2
|
||||
- snappy
|
||||
- none
|
||||
- auto
|
||||
|
||||
If `CompressionMethod` is not specified, it defaults to `auto`. This means ClickHouse detects compression method from the suffix of `URL` parameter automatically. If the suffix matches any of compression method listed above, corresponding compression is applied or there won't be any compression enabled.
|
||||
|
||||
For example, for engine expression `URL('http://localhost/test.gzip')`, `gzip` compression method is applied, but for `URL('http://localhost/test.fr')`, no compression is enabled because the suffix `fr` does not match any compression methods above.
|
||||
|
||||
## Usage {#using-the-engine-in-the-clickhouse-server}
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/special/view
|
||||
sidebar_position: 90
|
||||
sidebar_label: View
|
||||
---
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/getting-started/example-datasets/amplab-benchmark
|
||||
sidebar_label: AMPLab Big Data Benchmark
|
||||
description: A benchmark dataset used for comparing the performance of data warehousing solutions.
|
||||
---
|
||||
|
@ -1,10 +1,10 @@
|
||||
---
|
||||
slug: /en/getting-started/example-datasets/brown-benchmark
|
||||
sidebar_label: Brown University Benchmark
|
||||
description: A new analytical benchmark for machine-generated log data
|
||||
title: "Brown University Benchmark"
|
||||
---
|
||||
|
||||
# Brown University Benchmark
|
||||
|
||||
`MgBench` is a new analytical benchmark for machine-generated log data, [Andrew Crotty](http://cs.brown.edu/people/acrotty/).
|
||||
|
||||
Download the data:
|
||||
|
@ -1,9 +1,9 @@
|
||||
---
|
||||
slug: /en/getting-started/example-datasets/cell-towers
|
||||
sidebar_label: Cell Towers
|
||||
title: "Cell Towers"
|
||||
---
|
||||
|
||||
# Cell Towers
|
||||
|
||||
This dataset is from [OpenCellid](https://www.opencellid.org/) - The world's largest Open Database of Cell Towers.
|
||||
|
||||
As of 2021, it contains more than 40 million records about cell towers (GSM, LTE, UMTS, etc.) around the world with their geographical coordinates and metadata (country code, network, etc).
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/getting-started/example-datasets/criteo
|
||||
sidebar_label: Terabyte Click Logs from Criteo
|
||||
---
|
||||
|
||||
|
@ -1,9 +1,9 @@
|
||||
---
|
||||
slug: /en/getting-started/example-datasets/github-events
|
||||
sidebar_label: GitHub Events
|
||||
title: "GitHub Events Dataset"
|
||||
---
|
||||
|
||||
# GitHub Events Dataset
|
||||
|
||||
Dataset contains all events on GitHub from 2011 to Dec 6 2020, the size is 3.1 billion records. Download size is 75 GB and it will require up to 200 GB space on disk if stored in a table with lz4 compression.
|
||||
|
||||
Full dataset description, insights, download instruction and interactive queries are posted [here](https://ghe.clickhouse.tech/).
|
||||
|
@ -1,9 +1,9 @@
|
||||
---
|
||||
slug: /en/getting-started/example-datasets/menus
|
||||
sidebar_label: New York Public Library "What's on the Menu?" Dataset
|
||||
title: "New York Public Library \"What's on the Menu?\" Dataset"
|
||||
---
|
||||
|
||||
# New York Public Library "What's on the Menu?" Dataset
|
||||
|
||||
The dataset is created by the New York Public Library. It contains historical data on the menus of hotels, restaurants and cafes with the dishes along with their prices.
|
||||
|
||||
Source: http://menus.nypl.org/data
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/getting-started/example-datasets/metrica
|
||||
sidebar_label: Web Analytics Data
|
||||
description: Dataset consisting of two tables containing anonymized web analytics data with hits and visits
|
||||
---
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/getting-started/example-datasets/nyc-taxi
|
||||
sidebar_label: New York Taxi Data
|
||||
sidebar_position: 2
|
||||
description: Data for billions of taxi and for-hire vehicle (Uber, Lyft, etc.) trips originating in New York City since 2009
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
slug: /en/getting-started/example-datasets/ontime
|
||||
sidebar_label: OnTime Airline Flight Data
|
||||
description: Dataset containing the on-time performance of airline flights
|
||||
---
|
||||
|
@ -1,10 +1,10 @@
|
||||
---
|
||||
slug: /en/getting-started/example-datasets/opensky
|
||||
sidebar_label: Air Traffic Data
|
||||
description: The data in this dataset is derived and cleaned from the full OpenSky dataset to illustrate the development of air traffic during the COVID-19 pandemic.
|
||||
title: "Crowdsourced air traffic data from The OpenSky Network 2020"
|
||||
---
|
||||
|
||||
# Crowdsourced air traffic data from The OpenSky Network 2020
|
||||
|
||||
The data in this dataset is derived and cleaned from the full OpenSky dataset to illustrate the development of air traffic during the COVID-19 pandemic. It spans all flights seen by the network's more than 2500 members since 1 January 2019. More data will be periodically included in the dataset until the end of the COVID-19 pandemic.
|
||||
|
||||
Source: https://zenodo.org/record/5092942#.YRBCyTpRXYd
|
||||
|
@ -1,9 +1,9 @@
|
||||
---
|
||||
slug: /en/getting-started/example-datasets/recipes
|
||||
sidebar_label: Recipes Dataset
|
||||
title: "Recipes Dataset"
|
||||
---
|
||||
|
||||
# Recipes Dataset
|
||||
|
||||
RecipeNLG dataset is available for download [here](https://recipenlg.cs.put.poznan.pl/dataset). It contains 2.2 million recipes. The size is slightly less than 1 GB.
|
||||
|
||||
## Download and Unpack the Dataset
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user