diff --git a/CMakeLists.txt b/CMakeLists.txt
index f65cc3b3d48..0e9d2c13d45 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -39,6 +39,8 @@ else()
set(RECONFIGURE_MESSAGE_LEVEL STATUS)
endif()
+enable_language(C CXX ASM)
+
include (cmake/arch.cmake)
include (cmake/target.cmake)
include (cmake/tools.cmake)
diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt
index bd7885bc41b..9d74179902d 100644
--- a/cmake/autogenerated_versions.txt
+++ b/cmake/autogenerated_versions.txt
@@ -1,9 +1,9 @@
# This strings autochanged from release_lib.sh:
-SET(VERSION_REVISION 54449)
+SET(VERSION_REVISION 54450)
SET(VERSION_MAJOR 21)
-SET(VERSION_MINOR 4)
+SET(VERSION_MINOR 5)
SET(VERSION_PATCH 1)
-SET(VERSION_GITHASH af2135ef9dc72f16fa4f229b731262c3f0a8bbdc)
-SET(VERSION_DESCRIBE v21.4.1.1-prestable)
-SET(VERSION_STRING 21.4.1.1)
+SET(VERSION_GITHASH 3827789b3d8fd2021952e57e5110343d26daa1a1)
+SET(VERSION_DESCRIBE v21.5.1.1-prestable)
+SET(VERSION_STRING 21.5.1.1)
# end of autochange
diff --git a/cmake/find/base64.cmake b/cmake/find/base64.cmake
index 7427baf9cad..acade11eb2f 100644
--- a/cmake/find/base64.cmake
+++ b/cmake/find/base64.cmake
@@ -1,4 +1,8 @@
-option (ENABLE_BASE64 "Enable base64" ${ENABLE_LIBRARIES})
+if(ARCH_AMD64 OR ARCH_ARM)
+ option (ENABLE_BASE64 "Enable base64" ${ENABLE_LIBRARIES})
+elseif(ENABLE_BASE64)
+ message (${RECONFIGURE_MESSAGE_LEVEL} "base64 library is only supported on x86_64 and aarch64")
+endif()
if (NOT ENABLE_BASE64)
return()
diff --git a/cmake/find/fastops.cmake b/cmake/find/fastops.cmake
index 5ab320bdb7a..1675646654e 100644
--- a/cmake/find/fastops.cmake
+++ b/cmake/find/fastops.cmake
@@ -1,7 +1,7 @@
-if(NOT ARCH_ARM AND NOT OS_FREEBSD AND NOT OS_DARWIN)
+if(ARCH_AMD64 AND NOT OS_FREEBSD AND NOT OS_DARWIN)
option(ENABLE_FASTOPS "Enable fast vectorized mathematical functions library by Mikhail Parakhin" ${ENABLE_LIBRARIES})
elseif(ENABLE_FASTOPS)
- message (${RECONFIGURE_MESSAGE_LEVEL} "Fastops library is not supported on ARM, FreeBSD and Darwin")
+ message (${RECONFIGURE_MESSAGE_LEVEL} "Fastops library is supported on x86_64 only, and not FreeBSD or Darwin")
endif()
if(NOT ENABLE_FASTOPS)
diff --git a/cmake/find/hdfs3.cmake b/cmake/find/hdfs3.cmake
index 7b385f24e1e..3aab2b612ef 100644
--- a/cmake/find/hdfs3.cmake
+++ b/cmake/find/hdfs3.cmake
@@ -1,4 +1,4 @@
-if(NOT ARCH_ARM AND NOT OS_FREEBSD AND NOT APPLE AND USE_PROTOBUF)
+if(NOT ARCH_ARM AND NOT OS_FREEBSD AND NOT APPLE AND USE_PROTOBUF AND NOT ARCH_PPC64LE)
option(ENABLE_HDFS "Enable HDFS" ${ENABLE_LIBRARIES})
elseif(ENABLE_HDFS OR USE_INTERNAL_HDFS3_LIBRARY)
message (${RECONFIGURE_MESSAGE_LEVEL} "Cannot use HDFS3 with current configuration")
diff --git a/cmake/find/ldap.cmake b/cmake/find/ldap.cmake
index 369c1e42e8d..0dffa334e73 100644
--- a/cmake/find/ldap.cmake
+++ b/cmake/find/ldap.cmake
@@ -62,6 +62,7 @@ if (NOT OPENLDAP_FOUND AND NOT MISSING_INTERNAL_LDAP_LIBRARY)
if (
( "${_system_name}" STREQUAL "linux" AND "${_system_processor}" STREQUAL "x86_64" ) OR
( "${_system_name}" STREQUAL "linux" AND "${_system_processor}" STREQUAL "aarch64" ) OR
+ ( "${_system_name}" STREQUAL "linux" AND "${_system_processor}" STREQUAL "ppc64le" ) OR
( "${_system_name}" STREQUAL "freebsd" AND "${_system_processor}" STREQUAL "x86_64" ) OR
( "${_system_name}" STREQUAL "darwin" AND "${_system_processor}" STREQUAL "x86_64" )
)
diff --git a/cmake/find/s3.cmake b/cmake/find/s3.cmake
index 1bbf48fd6b0..1b0c652a31a 100644
--- a/cmake/find/s3.cmake
+++ b/cmake/find/s3.cmake
@@ -1,7 +1,7 @@
-if(NOT OS_FREEBSD AND NOT APPLE AND NOT ARCH_ARM)
+if(NOT OS_FREEBSD AND NOT APPLE)
option(ENABLE_S3 "Enable S3" ${ENABLE_LIBRARIES})
elseif(ENABLE_S3 OR USE_INTERNAL_AWS_S3_LIBRARY)
- message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use S3 on ARM, Apple or FreeBSD")
+ message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use S3 on Apple or FreeBSD")
endif()
if(NOT ENABLE_S3)
diff --git a/cmake/linux/default_libs.cmake b/cmake/linux/default_libs.cmake
index d3a727e9cb8..c1e4d450389 100644
--- a/cmake/linux/default_libs.cmake
+++ b/cmake/linux/default_libs.cmake
@@ -6,7 +6,7 @@ set (DEFAULT_LIBS "-nodefaultlibs")
# We need builtins from Clang's RT even without libcxx - for ubsan+int128.
# See https://bugs.llvm.org/show_bug.cgi?id=16404
if (COMPILER_CLANG AND NOT (CMAKE_CROSSCOMPILING AND ARCH_AARCH64))
- execute_process (COMMAND ${CMAKE_CXX_COMPILER} --print-file-name=libclang_rt.builtins-${CMAKE_SYSTEM_PROCESSOR}.a OUTPUT_VARIABLE BUILTINS_LIBRARY OUTPUT_STRIP_TRAILING_WHITESPACE)
+ execute_process (COMMAND ${CMAKE_CXX_COMPILER} --print-libgcc-file-name --rtlib=compiler-rt OUTPUT_VARIABLE BUILTINS_LIBRARY OUTPUT_STRIP_TRAILING_WHITESPACE)
else ()
set (BUILTINS_LIBRARY "-lgcc")
endif ()
diff --git a/cmake/tools.cmake b/cmake/tools.cmake
index abb11843d59..44fc3b3e530 100644
--- a/cmake/tools.cmake
+++ b/cmake/tools.cmake
@@ -86,8 +86,3 @@ if (LINKER_NAME)
message(STATUS "Using custom linker by name: ${LINKER_NAME}")
endif ()
-if (ARCH_PPC64LE)
- if (COMPILER_CLANG OR (COMPILER_GCC AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 8))
- message(FATAL_ERROR "Only gcc-8 or higher is supported for powerpc architecture")
- endif ()
-endif ()
diff --git a/contrib/boost-cmake/CMakeLists.txt b/contrib/boost-cmake/CMakeLists.txt
index b9298f59f2b..0759935a7db 100644
--- a/contrib/boost-cmake/CMakeLists.txt
+++ b/contrib/boost-cmake/CMakeLists.txt
@@ -160,6 +160,12 @@ if (NOT EXTERNAL_BOOST_FOUND)
enable_language(ASM)
SET(ASM_OPTIONS "-x assembler-with-cpp")
+ set (SRCS_CONTEXT
+ ${LIBRARY_DIR}/libs/context/src/dummy.cpp
+ ${LIBRARY_DIR}/libs/context/src/execution_context.cpp
+ ${LIBRARY_DIR}/libs/context/src/posix/stack_traits.cpp
+ )
+
if (SANITIZE AND (SANITIZE STREQUAL "address" OR SANITIZE STREQUAL "thread"))
add_compile_definitions(BOOST_USE_UCONTEXT)
@@ -169,39 +175,34 @@ if (NOT EXTERNAL_BOOST_FOUND)
add_compile_definitions(BOOST_USE_TSAN)
endif()
- set (SRCS_CONTEXT
+ set (SRCS_CONTEXT ${SRCS_CONTEXT}
${LIBRARY_DIR}/libs/context/src/fiber.cpp
${LIBRARY_DIR}/libs/context/src/continuation.cpp
- ${LIBRARY_DIR}/libs/context/src/dummy.cpp
- ${LIBRARY_DIR}/libs/context/src/execution_context.cpp
- ${LIBRARY_DIR}/libs/context/src/posix/stack_traits.cpp
)
- elseif (ARCH_ARM)
- set (SRCS_CONTEXT
+ endif()
+ if (ARCH_ARM)
+ set (SRCS_CONTEXT ${SRCS_CONTEXT}
${LIBRARY_DIR}/libs/context/src/asm/jump_arm64_aapcs_elf_gas.S
${LIBRARY_DIR}/libs/context/src/asm/make_arm64_aapcs_elf_gas.S
${LIBRARY_DIR}/libs/context/src/asm/ontop_arm64_aapcs_elf_gas.S
- ${LIBRARY_DIR}/libs/context/src/dummy.cpp
- ${LIBRARY_DIR}/libs/context/src/execution_context.cpp
- ${LIBRARY_DIR}/libs/context/src/posix/stack_traits.cpp
+ )
+ elseif (ARCH_PPC64LE)
+ set (SRCS_CONTEXT ${SRCS_CONTEXT}
+ ${LIBRARY_DIR}/libs/context/src/asm/jump_ppc64_sysv_elf_gas.S
+ ${LIBRARY_DIR}/libs/context/src/asm/make_ppc64_sysv_elf_gas.S
+ ${LIBRARY_DIR}/libs/context/src/asm/ontop_ppc64_sysv_elf_gas.S
)
elseif(OS_DARWIN)
- set (SRCS_CONTEXT
+ set (SRCS_CONTEXT ${SRCS_CONTEXT}
${LIBRARY_DIR}/libs/context/src/asm/jump_x86_64_sysv_macho_gas.S
${LIBRARY_DIR}/libs/context/src/asm/make_x86_64_sysv_macho_gas.S
${LIBRARY_DIR}/libs/context/src/asm/ontop_x86_64_sysv_macho_gas.S
- ${LIBRARY_DIR}/libs/context/src/dummy.cpp
- ${LIBRARY_DIR}/libs/context/src/execution_context.cpp
- ${LIBRARY_DIR}/libs/context/src/posix/stack_traits.cpp
)
else()
- set (SRCS_CONTEXT
+ set (SRCS_CONTEXT ${SRCS_CONTEXT}
${LIBRARY_DIR}/libs/context/src/asm/jump_x86_64_sysv_elf_gas.S
${LIBRARY_DIR}/libs/context/src/asm/make_x86_64_sysv_elf_gas.S
${LIBRARY_DIR}/libs/context/src/asm/ontop_x86_64_sysv_elf_gas.S
- ${LIBRARY_DIR}/libs/context/src/dummy.cpp
- ${LIBRARY_DIR}/libs/context/src/execution_context.cpp
- ${LIBRARY_DIR}/libs/context/src/posix/stack_traits.cpp
)
endif()
diff --git a/contrib/cctz-cmake/CMakeLists.txt b/contrib/cctz-cmake/CMakeLists.txt
index 90e33dc9f62..a3869478347 100644
--- a/contrib/cctz-cmake/CMakeLists.txt
+++ b/contrib/cctz-cmake/CMakeLists.txt
@@ -97,12 +97,19 @@ if (NOT EXTERNAL_CCTZ_LIBRARY_FOUND OR NOT EXTERNAL_CCTZ_LIBRARY_WORKS)
set(TZ_OBJS ${TZ_OBJS} ${TZ_OBJ})
# https://stackoverflow.com/questions/14776463/compile-and-add-an-object-file-from-a-binary-with-cmake
- add_custom_command(OUTPUT ${TZ_OBJ}
- COMMAND cp ${TZDIR}/${TIMEZONE} ${CMAKE_CURRENT_BINARY_DIR}/${TIMEZONE_ID}
- COMMAND cd ${CMAKE_CURRENT_BINARY_DIR} && ${OBJCOPY_PATH} -I binary ${OBJCOPY_ARCH_OPTIONS}
+ # PPC64LE fails to do this with objcopy, use ld or lld instead
+ if (ARCH_PPC64LE)
+ add_custom_command(OUTPUT ${TZ_OBJ}
+ COMMAND cp ${TZDIR}/${TIMEZONE} ${CMAKE_CURRENT_BINARY_DIR}/${TIMEZONE_ID}
+ COMMAND cd ${CMAKE_CURRENT_BINARY_DIR} && ${CMAKE_LINKER} -m elf64lppc -r -b binary -o ${TZ_OBJ} ${TIMEZONE_ID}
+ COMMAND rm ${CMAKE_CURRENT_BINARY_DIR}/${TIMEZONE_ID})
+ else()
+ add_custom_command(OUTPUT ${TZ_OBJ}
+ COMMAND cp ${TZDIR}/${TIMEZONE} ${CMAKE_CURRENT_BINARY_DIR}/${TIMEZONE_ID}
+ COMMAND cd ${CMAKE_CURRENT_BINARY_DIR} && ${OBJCOPY_PATH} -I binary ${OBJCOPY_ARCH_OPTIONS}
--rename-section .data=.rodata,alloc,load,readonly,data,contents ${TIMEZONE_ID} ${TZ_OBJ}
- COMMAND rm ${CMAKE_CURRENT_BINARY_DIR}/${TIMEZONE_ID})
-
+ COMMAND rm ${CMAKE_CURRENT_BINARY_DIR}/${TIMEZONE_ID})
+ endif()
set_source_files_properties(${TZ_OBJ} PROPERTIES EXTERNAL_OBJECT true GENERATED true)
endforeach(TIMEZONE)
diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt
index b8a6474413a..73afa99f1d8 100644
--- a/contrib/jemalloc-cmake/CMakeLists.txt
+++ b/contrib/jemalloc-cmake/CMakeLists.txt
@@ -1,7 +1,7 @@
-if (SANITIZE OR NOT (ARCH_AMD64 OR ARCH_ARM) OR NOT (OS_LINUX OR OS_FREEBSD OR OS_DARWIN))
+if (SANITIZE OR NOT (ARCH_AMD64 OR ARCH_ARM OR ARCH_PPC64LE) OR NOT (OS_LINUX OR OS_FREEBSD OR OS_DARWIN))
if (ENABLE_JEMALLOC)
message (${RECONFIGURE_MESSAGE_LEVEL}
- "jemalloc is disabled implicitly: it doesn't work with sanitizers and can only be used with x86_64 or aarch64 on linux or freebsd.")
+ "jemalloc is disabled implicitly: it doesn't work with sanitizers and can only be used with x86_64, aarch64 or ppc64le on linux or freebsd.")
endif()
set (ENABLE_JEMALLOC OFF)
else()
@@ -107,6 +107,8 @@ if (ARCH_AMD64)
set(JEMALLOC_INCLUDE_PREFIX "${JEMALLOC_INCLUDE_PREFIX}_x86_64")
elseif (ARCH_ARM)
set(JEMALLOC_INCLUDE_PREFIX "${JEMALLOC_INCLUDE_PREFIX}_aarch64")
+elseif (ARCH_PPC64LE)
+ set(JEMALLOC_INCLUDE_PREFIX "${JEMALLOC_INCLUDE_PREFIX}_ppc64le")
else ()
message (FATAL_ERROR "internal jemalloc: This arch is not supported")
endif ()
diff --git a/contrib/jemalloc-cmake/include_linux_ppc64le/jemalloc/internal/jemalloc_internal_defs.h.in b/contrib/jemalloc-cmake/include_linux_ppc64le/jemalloc/internal/jemalloc_internal_defs.h.in
new file mode 100644
index 00000000000..8068861041f
--- /dev/null
+++ b/contrib/jemalloc-cmake/include_linux_ppc64le/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -0,0 +1,367 @@
+/* include/jemalloc/internal/jemalloc_internal_defs.h. Generated from jemalloc_internal_defs.h.in by configure. */
+#ifndef JEMALLOC_INTERNAL_DEFS_H_
+#define JEMALLOC_INTERNAL_DEFS_H_
+/*
+ * If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all
+ * public APIs to be prefixed. This makes it possible, with some care, to use
+ * multiple allocators simultaneously.
+ */
+/* #undef JEMALLOC_PREFIX */
+/* #undef JEMALLOC_CPREFIX */
+
+/*
+ * Define overrides for non-standard allocator-related functions if they are
+ * present on the system.
+ */
+#define JEMALLOC_OVERRIDE___LIBC_CALLOC
+#define JEMALLOC_OVERRIDE___LIBC_FREE
+#define JEMALLOC_OVERRIDE___LIBC_MALLOC
+#define JEMALLOC_OVERRIDE___LIBC_MEMALIGN
+#define JEMALLOC_OVERRIDE___LIBC_REALLOC
+#define JEMALLOC_OVERRIDE___LIBC_VALLOC
+/* #undef JEMALLOC_OVERRIDE___POSIX_MEMALIGN */
+
+/*
+ * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs.
+ * For shared libraries, symbol visibility mechanisms prevent these symbols
+ * from being exported, but for static libraries, naming collisions are a real
+ * possibility.
+ */
+#define JEMALLOC_PRIVATE_NAMESPACE je_
+
+/*
+ * Hyper-threaded CPUs may need a special instruction inside spin loops in
+ * order to yield to another virtual CPU.
+ */
+#define CPU_SPINWAIT
+/* 1 if CPU_SPINWAIT is defined, 0 otherwise. */
+#define HAVE_CPU_SPINWAIT 0
+
+/*
+ * Number of significant bits in virtual addresses. This may be less than the
+ * total number of bits in a pointer, e.g. on x64, for which the uppermost 16
+ * bits are the same as bit 47.
+ */
+#define LG_VADDR 64
+
+/* Defined if C11 atomics are available. */
+#define JEMALLOC_C11_ATOMICS 1
+
+/* Defined if GCC __atomic atomics are available. */
+#define JEMALLOC_GCC_ATOMIC_ATOMICS 1
+/* and the 8-bit variant support. */
+#define JEMALLOC_GCC_U8_ATOMIC_ATOMICS 1
+
+/* Defined if GCC __sync atomics are available. */
+#define JEMALLOC_GCC_SYNC_ATOMICS 1
+/* and the 8-bit variant support. */
+#define JEMALLOC_GCC_U8_SYNC_ATOMICS 1
+
+/*
+ * Defined if __builtin_clz() and __builtin_clzl() are available.
+ */
+#define JEMALLOC_HAVE_BUILTIN_CLZ
+
+/*
+ * Defined if os_unfair_lock_*() functions are available, as provided by Darwin.
+ */
+/* #undef JEMALLOC_OS_UNFAIR_LOCK */
+
+/* Defined if syscall(2) is usable. */
+#define JEMALLOC_USE_SYSCALL
+
+/*
+ * Defined if secure_getenv(3) is available.
+ */
+// #define JEMALLOC_HAVE_SECURE_GETENV
+
+/*
+ * Defined if issetugid(2) is available.
+ */
+/* #undef JEMALLOC_HAVE_ISSETUGID */
+
+/* Defined if pthread_atfork(3) is available. */
+#define JEMALLOC_HAVE_PTHREAD_ATFORK
+
+/* Defined if pthread_setname_np(3) is available. */
+#define JEMALLOC_HAVE_PTHREAD_SETNAME_NP
+
+/*
+ * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available.
+ */
+#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE 1
+
+/*
+ * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available.
+ */
+#define JEMALLOC_HAVE_CLOCK_MONOTONIC 1
+
+/*
+ * Defined if mach_absolute_time() is available.
+ */
+/* #undef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME */
+
+/*
+ * Defined if _malloc_thread_cleanup() exists. At least in the case of
+ * FreeBSD, pthread_key_create() allocates, which if used during malloc
+ * bootstrapping will cause recursion into the pthreads library. Therefore, if
+ * _malloc_thread_cleanup() exists, use it as the basis for thread cleanup in
+ * malloc_tsd.
+ */
+/* #undef JEMALLOC_MALLOC_THREAD_CLEANUP */
+
+/*
+ * Defined if threaded initialization is known to be safe on this platform.
+ * Among other things, it must be possible to initialize a mutex without
+ * triggering allocation in order for threaded allocation to be safe.
+ */
+#define JEMALLOC_THREADED_INIT
+
+/*
+ * Defined if the pthreads implementation defines
+ * _pthread_mutex_init_calloc_cb(), in which case the function is used in order
+ * to avoid recursive allocation during mutex initialization.
+ */
+/* #undef JEMALLOC_MUTEX_INIT_CB */
+
+/* Non-empty if the tls_model attribute is supported. */
+#define JEMALLOC_TLS_MODEL __attribute__((tls_model("initial-exec")))
+
+/*
+ * JEMALLOC_DEBUG enables assertions and other sanity checks, and disables
+ * inline functions.
+ */
+/* #undef JEMALLOC_DEBUG */
+
+/* JEMALLOC_STATS enables statistics calculation. */
+#define JEMALLOC_STATS
+
+/* JEMALLOC_EXPERIMENTAL_SMALLOCX_API enables experimental smallocx API. */
+/* #undef JEMALLOC_EXPERIMENTAL_SMALLOCX_API */
+
+/* JEMALLOC_PROF enables allocation profiling. */
+/* #undef JEMALLOC_PROF */
+
+/* Use libunwind for profile backtracing if defined. */
+/* #undef JEMALLOC_PROF_LIBUNWIND */
+
+/* Use libgcc for profile backtracing if defined. */
+/* #undef JEMALLOC_PROF_LIBGCC */
+
+/* Use gcc intrinsics for profile backtracing if defined. */
+/* #undef JEMALLOC_PROF_GCC */
+
+/*
+ * JEMALLOC_DSS enables use of sbrk(2) to allocate extents from the data storage
+ * segment (DSS).
+ */
+#define JEMALLOC_DSS
+
+/* Support memory filling (junk/zero). */
+#define JEMALLOC_FILL
+
+/* Support utrace(2)-based tracing. */
+/* #undef JEMALLOC_UTRACE */
+
+/* Support optional abort() on OOM. */
+/* #undef JEMALLOC_XMALLOC */
+
+/* Support lazy locking (avoid locking unless a second thread is launched). */
+/* #undef JEMALLOC_LAZY_LOCK */
+
+/*
+ * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size
+ * classes).
+ */
+/* #undef LG_QUANTUM */
+
+/* One page is 2^LG_PAGE bytes. */
+#define LG_PAGE 16
+
+/*
+ * One huge page is 2^LG_HUGEPAGE bytes. Note that this is defined even if the
+ * system does not explicitly support huge pages; system calls that require
+ * explicit huge page support are separately configured.
+ */
+#define LG_HUGEPAGE 21
+
+/*
+ * If defined, adjacent virtual memory mappings with identical attributes
+ * automatically coalesce, and they fragment when changes are made to subranges.
+ * This is the normal order of things for mmap()/munmap(), but on Windows
+ * VirtualAlloc()/VirtualFree() operations must be precisely matched, i.e.
+ * mappings do *not* coalesce/fragment.
+ */
+#define JEMALLOC_MAPS_COALESCE
+
+/*
+ * If defined, retain memory for later reuse by default rather than using e.g.
+ * munmap() to unmap freed extents. This is enabled on 64-bit Linux because
+ * common sequences of mmap()/munmap() calls will cause virtual memory map
+ * holes.
+ */
+#define JEMALLOC_RETAIN
+
+/* TLS is used to map arenas and magazine caches to threads. */
+#define JEMALLOC_TLS
+
+/*
+ * Used to mark unreachable code to quiet "end of non-void" compiler warnings.
+ * Don't use this directly; instead use unreachable() from util.h
+ */
+#define JEMALLOC_INTERNAL_UNREACHABLE __builtin_unreachable
+
+/*
+ * ffs*() functions to use for bitmapping. Don't use these directly; instead,
+ * use ffs_*() from util.h.
+ */
+#define JEMALLOC_INTERNAL_FFSLL __builtin_ffsll
+#define JEMALLOC_INTERNAL_FFSL __builtin_ffsl
+#define JEMALLOC_INTERNAL_FFS __builtin_ffs
+
+/*
+ * popcount*() functions to use for bitmapping.
+ */
+#define JEMALLOC_INTERNAL_POPCOUNTL __builtin_popcountl
+#define JEMALLOC_INTERNAL_POPCOUNT __builtin_popcount
+
+/*
+ * If defined, explicitly attempt to more uniformly distribute large allocation
+ * pointer alignments across all cache indices.
+ */
+#define JEMALLOC_CACHE_OBLIVIOUS
+
+/*
+ * If defined, enable logging facilities. We make this a configure option to
+ * avoid taking extra branches everywhere.
+ */
+/* #undef JEMALLOC_LOG */
+
+/*
+ * If defined, use readlinkat() (instead of readlink()) to follow
+ * /etc/malloc_conf.
+ */
+/* #undef JEMALLOC_READLINKAT */
+
+/*
+ * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings.
+ */
+/* #undef JEMALLOC_ZONE */
+
+/*
+ * Methods for determining whether the OS overcommits.
+ * JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY: Linux's
+ * /proc/sys/vm.overcommit_memory file.
+ * JEMALLOC_SYSCTL_VM_OVERCOMMIT: FreeBSD's vm.overcommit sysctl.
+ */
+/* #undef JEMALLOC_SYSCTL_VM_OVERCOMMIT */
+#define JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY
+
+/* Defined if madvise(2) is available. */
+#define JEMALLOC_HAVE_MADVISE
+
+/*
+ * Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE
+ * arguments to madvise(2).
+ */
+#define JEMALLOC_HAVE_MADVISE_HUGE
+
+/*
+ * Methods for purging unused pages differ between operating systems.
+ *
+ * madvise(..., MADV_FREE) : This marks pages as being unused, such that they
+ * will be discarded rather than swapped out.
+ * madvise(..., MADV_DONTNEED) : If JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS is
+ * defined, this immediately discards pages,
+ * such that new pages will be demand-zeroed if
+ * the address region is later touched;
+ * otherwise this behaves similarly to
+ * MADV_FREE, though typically with higher
+ * system overhead.
+ */
+#define JEMALLOC_PURGE_MADVISE_FREE
+#define JEMALLOC_PURGE_MADVISE_DONTNEED
+#define JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS
+
+/* Defined if madvise(2) is available but MADV_FREE is not (x86 Linux only). */
+/* #undef JEMALLOC_DEFINE_MADVISE_FREE */
+
+/*
+ * Defined if MADV_DO[NT]DUMP is supported as an argument to madvise.
+ */
+#define JEMALLOC_MADVISE_DONTDUMP
+
+/*
+ * Defined if transparent huge pages (THPs) are supported via the
+ * MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled.
+ */
+/* #undef JEMALLOC_THP */
+
+/* Define if operating system has alloca.h header. */
+#define JEMALLOC_HAS_ALLOCA_H 1
+
+/* C99 restrict keyword supported. */
+#define JEMALLOC_HAS_RESTRICT 1
+
+/* For use by hash code. */
+/* #undef JEMALLOC_BIG_ENDIAN */
+
+/* sizeof(int) == 2^LG_SIZEOF_INT. */
+#define LG_SIZEOF_INT 2
+
+/* sizeof(long) == 2^LG_SIZEOF_LONG. */
+#define LG_SIZEOF_LONG 3
+
+/* sizeof(long long) == 2^LG_SIZEOF_LONG_LONG. */
+#define LG_SIZEOF_LONG_LONG 3
+
+/* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */
+#define LG_SIZEOF_INTMAX_T 3
+
+/* glibc malloc hooks (__malloc_hook, __realloc_hook, __free_hook). */
+#define JEMALLOC_GLIBC_MALLOC_HOOK
+
+/* glibc memalign hook. */
+#define JEMALLOC_GLIBC_MEMALIGN_HOOK
+
+/* pthread support */
+#define JEMALLOC_HAVE_PTHREAD
+
+/* dlsym() support */
+#define JEMALLOC_HAVE_DLSYM
+
+/* Adaptive mutex support in pthreads. */
+#define JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP
+
+/* GNU specific sched_getcpu support */
+#define JEMALLOC_HAVE_SCHED_GETCPU
+
+/* GNU specific sched_setaffinity support */
+#define JEMALLOC_HAVE_SCHED_SETAFFINITY
+
+/*
+ * If defined, all the features necessary for background threads are present.
+ */
+#define JEMALLOC_BACKGROUND_THREAD 1
+
+/*
+ * If defined, jemalloc symbols are not exported (doesn't work when
+ * JEMALLOC_PREFIX is not defined).
+ */
+/* #undef JEMALLOC_EXPORT */
+
+/* config.malloc_conf options string. */
+#define JEMALLOC_CONFIG_MALLOC_CONF "@JEMALLOC_CONFIG_MALLOC_CONF@"
+
+/* If defined, jemalloc takes the malloc/free/etc. symbol names. */
+#define JEMALLOC_IS_MALLOC 1
+
+/*
+ * Defined if strerror_r returns char * if _GNU_SOURCE is defined.
+ */
+#define JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE
+
+/* Performs additional safety checks when defined. */
+/* #undef JEMALLOC_OPT_SAFETY_CHECKS */
+
+#endif /* JEMALLOC_INTERNAL_DEFS_H_ */
diff --git a/contrib/libcpuid-cmake/CMakeLists.txt b/contrib/libcpuid-cmake/CMakeLists.txt
index 8c1be50b4e6..9baebb3ba1b 100644
--- a/contrib/libcpuid-cmake/CMakeLists.txt
+++ b/contrib/libcpuid-cmake/CMakeLists.txt
@@ -1,11 +1,9 @@
-if (NOT ARCH_ARM)
+if(ARCH_AMD64)
option (ENABLE_CPUID "Enable libcpuid library (only internal)" ${ENABLE_LIBRARIES})
-endif()
-
-if (ARCH_ARM AND ENABLE_CPUID)
- message (${RECONFIGURE_MESSAGE_LEVEL} "cpuid is not supported on ARM")
+elseif(ENABLE_CPUID)
+ message (${RECONFIGURE_MESSAGE_LEVEL} "libcpuid is only supported on x86_64")
set (ENABLE_CPUID 0)
-endif ()
+endif()
if (NOT ENABLE_CPUID)
add_library (cpuid INTERFACE)
diff --git a/contrib/openldap-cmake/linux_ppc64le/include/lber_types.h b/contrib/openldap-cmake/linux_ppc64le/include/lber_types.h
new file mode 100644
index 00000000000..dbd59430527
--- /dev/null
+++ b/contrib/openldap-cmake/linux_ppc64le/include/lber_types.h
@@ -0,0 +1,63 @@
+/* include/lber_types.h. Generated from lber_types.hin by configure. */
+/* $OpenLDAP$ */
+/* This work is part of OpenLDAP Software .
+ *
+ * Copyright 1998-2020 The OpenLDAP Foundation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted only as authorized by the OpenLDAP
+ * Public License.
+ *
+ * A copy of this license is available in file LICENSE in the
+ * top-level directory of the distribution or, alternatively, at
+ * .
+ */
+
+/*
+ * LBER types
+ */
+
+#ifndef _LBER_TYPES_H
+#define _LBER_TYPES_H
+
+#include
+
+LDAP_BEGIN_DECL
+
+/* LBER boolean, enum, integers (32 bits or larger) */
+#define LBER_INT_T int
+
+/* LBER tags (32 bits or larger) */
+#define LBER_TAG_T long
+
+/* LBER socket descriptor */
+#define LBER_SOCKET_T int
+
+/* LBER lengths (32 bits or larger) */
+#define LBER_LEN_T long
+
+/* ------------------------------------------------------------ */
+
+/* booleans, enumerations, and integers */
+typedef LBER_INT_T ber_int_t;
+
+/* signed and unsigned versions */
+typedef signed LBER_INT_T ber_sint_t;
+typedef unsigned LBER_INT_T ber_uint_t;
+
+/* tags */
+typedef unsigned LBER_TAG_T ber_tag_t;
+
+/* "socket" descriptors */
+typedef LBER_SOCKET_T ber_socket_t;
+
+/* lengths */
+typedef unsigned LBER_LEN_T ber_len_t;
+
+/* signed lengths */
+typedef signed LBER_LEN_T ber_slen_t;
+
+LDAP_END_DECL
+
+#endif /* _LBER_TYPES_H */
diff --git a/contrib/openldap-cmake/linux_ppc64le/include/ldap_config.h b/contrib/openldap-cmake/linux_ppc64le/include/ldap_config.h
new file mode 100644
index 00000000000..89f7b40b884
--- /dev/null
+++ b/contrib/openldap-cmake/linux_ppc64le/include/ldap_config.h
@@ -0,0 +1,74 @@
+/* include/ldap_config.h. Generated from ldap_config.hin by configure. */
+/* $OpenLDAP$ */
+/* This work is part of OpenLDAP Software .
+ *
+ * Copyright 1998-2020 The OpenLDAP Foundation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted only as authorized by the OpenLDAP
+ * Public License.
+ *
+ * A copy of this license is available in file LICENSE in the
+ * top-level directory of the distribution or, alternatively, at
+ * .
+ */
+
+/*
+ * This file works in conjunction with OpenLDAP configure system.
+ * If you do no like the values below, adjust your configure options.
+ */
+
+#ifndef _LDAP_CONFIG_H
+#define _LDAP_CONFIG_H
+
+/* directory separator */
+#ifndef LDAP_DIRSEP
+#ifndef _WIN32
+#define LDAP_DIRSEP "/"
+#else
+#define LDAP_DIRSEP "\\"
+#endif
+#endif
+
+/* directory for temporary files */
+#if defined(_WIN32)
+# define LDAP_TMPDIR "C:\\." /* we don't have much of a choice */
+#elif defined( _P_tmpdir )
+# define LDAP_TMPDIR _P_tmpdir
+#elif defined( P_tmpdir )
+# define LDAP_TMPDIR P_tmpdir
+#elif defined( _PATH_TMPDIR )
+# define LDAP_TMPDIR _PATH_TMPDIR
+#else
+# define LDAP_TMPDIR LDAP_DIRSEP "tmp"
+#endif
+
+/* directories */
+#ifndef LDAP_BINDIR
+#define LDAP_BINDIR "/tmp/ldap-prefix/bin"
+#endif
+#ifndef LDAP_SBINDIR
+#define LDAP_SBINDIR "/tmp/ldap-prefix/sbin"
+#endif
+#ifndef LDAP_DATADIR
+#define LDAP_DATADIR "/tmp/ldap-prefix/share/openldap"
+#endif
+#ifndef LDAP_SYSCONFDIR
+#define LDAP_SYSCONFDIR "/tmp/ldap-prefix/etc/openldap"
+#endif
+#ifndef LDAP_LIBEXECDIR
+#define LDAP_LIBEXECDIR "/tmp/ldap-prefix/libexec"
+#endif
+#ifndef LDAP_MODULEDIR
+#define LDAP_MODULEDIR "/tmp/ldap-prefix/libexec/openldap"
+#endif
+#ifndef LDAP_RUNDIR
+#define LDAP_RUNDIR "/tmp/ldap-prefix/var"
+#endif
+#ifndef LDAP_LOCALEDIR
+#define LDAP_LOCALEDIR ""
+#endif
+
+
+#endif /* _LDAP_CONFIG_H */
diff --git a/contrib/openldap-cmake/linux_ppc64le/include/ldap_features.h b/contrib/openldap-cmake/linux_ppc64le/include/ldap_features.h
new file mode 100644
index 00000000000..f0cc7c3626f
--- /dev/null
+++ b/contrib/openldap-cmake/linux_ppc64le/include/ldap_features.h
@@ -0,0 +1,61 @@
+/* include/ldap_features.h. Generated from ldap_features.hin by configure. */
+/* $OpenLDAP$ */
+/* This work is part of OpenLDAP Software .
+ *
+ * Copyright 1998-2020 The OpenLDAP Foundation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted only as authorized by the OpenLDAP
+ * Public License.
+ *
+ * A copy of this license is available in file LICENSE in the
+ * top-level directory of the distribution or, alternatively, at
+ * .
+ */
+
+/*
+ * LDAP Features
+ */
+
+#ifndef _LDAP_FEATURES_H
+#define _LDAP_FEATURES_H 1
+
+/* OpenLDAP API version macros */
+#define LDAP_VENDOR_VERSION 20501
+#define LDAP_VENDOR_VERSION_MAJOR 2
+#define LDAP_VENDOR_VERSION_MINOR 5
+#define LDAP_VENDOR_VERSION_PATCH X
+
+/*
+** WORK IN PROGRESS!
+**
+** OpenLDAP reentrancy/thread-safeness should be dynamically
+** checked using ldap_get_option().
+**
+** The -lldap implementation is not thread-safe.
+**
+** The -lldap_r implementation is:
+** LDAP_API_FEATURE_THREAD_SAFE (basic thread safety)
+** but also be:
+** LDAP_API_FEATURE_SESSION_THREAD_SAFE
+** LDAP_API_FEATURE_OPERATION_THREAD_SAFE
+**
+** The preprocessor flag LDAP_API_FEATURE_X_OPENLDAP_THREAD_SAFE
+** can be used to determine if -lldap_r is available at compile
+** time. You must define LDAP_THREAD_SAFE if and only if you
+** link with -lldap_r.
+**
+** If you fail to define LDAP_THREAD_SAFE when linking with
+** -lldap_r or define LDAP_THREAD_SAFE when linking with -lldap,
+** provided header definitions and declarations may be incorrect.
+**
+*/
+
+/* is -lldap_r available or not */
+#define LDAP_API_FEATURE_X_OPENLDAP_THREAD_SAFE 1
+
+/* LDAP v2 Referrals */
+/* #undef LDAP_API_FEATURE_X_OPENLDAP_V2_REFERRALS */
+
+#endif /* LDAP_FEATURES */
diff --git a/contrib/openldap-cmake/linux_ppc64le/include/portable.h b/contrib/openldap-cmake/linux_ppc64le/include/portable.h
new file mode 100644
index 00000000000..2924b6713a4
--- /dev/null
+++ b/contrib/openldap-cmake/linux_ppc64le/include/portable.h
@@ -0,0 +1,1169 @@
+/* include/portable.h. Generated from portable.hin by configure. */
+/* include/portable.hin. Generated from configure.in by autoheader. */
+
+
+/* begin of portable.h.pre */
+/* This work is part of OpenLDAP Software .
+ *
+ * Copyright 1998-2020 The OpenLDAP Foundation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted only as authorized by the OpenLDAP
+ * Public License.
+ *
+ * A copy of this license is available in the file LICENSE in the
+ * top-level directory of the distribution or, alternatively, at
+ * .
+ */
+
+#ifndef _LDAP_PORTABLE_H
+#define _LDAP_PORTABLE_H
+
+/* define this if needed to get reentrant functions */
+#ifndef REENTRANT
+#define REENTRANT 1
+#endif
+#ifndef _REENTRANT
+#define _REENTRANT 1
+#endif
+
+/* define this if needed to get threadsafe functions */
+#ifndef THREADSAFE
+#define THREADSAFE 1
+#endif
+#ifndef _THREADSAFE
+#define _THREADSAFE 1
+#endif
+#ifndef THREAD_SAFE
+#define THREAD_SAFE 1
+#endif
+#ifndef _THREAD_SAFE
+#define _THREAD_SAFE 1
+#endif
+
+#ifndef _SGI_MP_SOURCE
+#define _SGI_MP_SOURCE 1
+#endif
+
+/* end of portable.h.pre */
+
+
+/* Define if building universal (internal helper macro) */
+/* #undef AC_APPLE_UNIVERSAL_BUILD */
+
+/* define to use both and */
+/* #undef BOTH_STRINGS_H */
+
+/* define if cross compiling */
+/* #undef CROSS_COMPILING */
+
+/* set to the number of arguments ctime_r() expects */
+#define CTIME_R_NARGS 2
+
+/* define if toupper() requires islower() */
+/* #undef C_UPPER_LOWER */
+
+/* define if sys_errlist is not declared in stdio.h or errno.h */
+/* #undef DECL_SYS_ERRLIST */
+
+/* define to enable slapi library */
+/* #undef ENABLE_SLAPI */
+
+/* defined to be the EXE extension */
+#define EXEEXT ""
+
+/* set to the number of arguments gethostbyaddr_r() expects */
+#define GETHOSTBYADDR_R_NARGS 8
+
+/* set to the number of arguments gethostbyname_r() expects */
+#define GETHOSTBYNAME_R_NARGS 6
+
+/* Define to 1 if `TIOCGWINSZ' requires . */
+#define GWINSZ_IN_SYS_IOCTL 1
+
+/* define if you have AIX security lib */
+/* #undef HAVE_AIX_SECURITY */
+
+/* Define to 1 if you have the header file. */
+#define HAVE_ARPA_INET_H 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_ARPA_NAMESER_H 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_ASSERT_H 1
+
+/* Define to 1 if you have the `bcopy' function. */
+#define HAVE_BCOPY 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_BITS_TYPES_H 1
+
+/* Define to 1 if you have the `chroot' function. */
+#define HAVE_CHROOT 1
+
+/* Define to 1 if you have the `closesocket' function. */
+/* #undef HAVE_CLOSESOCKET */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_CONIO_H */
+
+/* define if crypt(3) is available */
+/* #undef HAVE_CRYPT */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_CRYPT_H */
+
+/* define if crypt_r() is also available */
+/* #undef HAVE_CRYPT_R */
+
+/* Define to 1 if you have the `ctime_r' function. */
+#define HAVE_CTIME_R 1
+
+/* define if you have Cyrus SASL */
+/* #undef HAVE_CYRUS_SASL */
+
+/* define if your system supports /dev/poll */
+/* #undef HAVE_DEVPOLL */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_DIRECT_H */
+
+/* Define to 1 if you have the header file, and it defines `DIR'.
+ */
+#define HAVE_DIRENT_H 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_DLFCN_H 1
+
+/* Define to 1 if you don't have `vprintf' but do have `_doprnt.' */
+/* #undef HAVE_DOPRNT */
+
+/* define if system uses EBCDIC instead of ASCII */
+/* #undef HAVE_EBCDIC */
+
+/* Define to 1 if you have the `endgrent' function. */
+#define HAVE_ENDGRENT 1
+
+/* Define to 1 if you have the `endpwent' function. */
+#define HAVE_ENDPWENT 1
+
+/* define if your system supports epoll */
+#define HAVE_EPOLL 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_ERRNO_H 1
+
+/* Define to 1 if you have the `fcntl' function. */
+#define HAVE_FCNTL 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_FCNTL_H 1
+
+/* define if you actually have FreeBSD fetch(3) */
+/* #undef HAVE_FETCH */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_FILIO_H */
+
+/* Define to 1 if you have the `flock' function. */
+#define HAVE_FLOCK 1
+
+/* Define to 1 if you have the `fstat' function. */
+#define HAVE_FSTAT 1
+
+/* Define to 1 if you have the `gai_strerror' function. */
+#define HAVE_GAI_STRERROR 1
+
+/* Define to 1 if you have the `getaddrinfo' function. */
+#define HAVE_GETADDRINFO 1
+
+/* Define to 1 if you have the `getdtablesize' function. */
+#define HAVE_GETDTABLESIZE 1
+
+/* Define to 1 if you have the `geteuid' function. */
+#define HAVE_GETEUID 1
+
+/* Define to 1 if you have the `getgrgid' function. */
+#define HAVE_GETGRGID 1
+
+/* Define to 1 if you have the `gethostbyaddr_r' function. */
+#define HAVE_GETHOSTBYADDR_R 1
+
+/* Define to 1 if you have the `gethostbyname_r' function. */
+#define HAVE_GETHOSTBYNAME_R 1
+
+/* Define to 1 if you have the `gethostname' function. */
+#define HAVE_GETHOSTNAME 1
+
+/* Define to 1 if you have the `getnameinfo' function. */
+#define HAVE_GETNAMEINFO 1
+
+/* Define to 1 if you have the `getopt' function. */
+#define HAVE_GETOPT 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_GETOPT_H 1
+
+/* Define to 1 if you have the `getpassphrase' function. */
+/* #undef HAVE_GETPASSPHRASE */
+
+/* Define to 1 if you have the `getpeereid' function. */
+/* #undef HAVE_GETPEEREID */
+
+/* Define to 1 if you have the `getpeerucred' function. */
+/* #undef HAVE_GETPEERUCRED */
+
+/* Define to 1 if you have the `getpwnam' function. */
+#define HAVE_GETPWNAM 1
+
+/* Define to 1 if you have the `getpwuid' function. */
+#define HAVE_GETPWUID 1
+
+/* Define to 1 if you have the `getspnam' function. */
+#define HAVE_GETSPNAM 1
+
+/* Define to 1 if you have the `gettimeofday' function. */
+#define HAVE_GETTIMEOFDAY 1
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_GMP_H */
+
+/* Define to 1 if you have the `gmtime_r' function. */
+#define HAVE_GMTIME_R 1
+
+/* define if you have GNUtls */
+/* #undef HAVE_GNUTLS */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_GNUTLS_GNUTLS_H */
+
+/* if you have GNU Pth */
+/* #undef HAVE_GNU_PTH */
+
+/* Define to 1 if you have the header file. */
+#define HAVE_GRP_H 1
+
+/* Define to 1 if you have the `hstrerror' function. */
+#define HAVE_HSTRERROR 1
+
+/* define to you inet_aton(3) is available */
+#define HAVE_INET_ATON 1
+
+/* Define to 1 if you have the `inet_ntoa_b' function. */
+/* #undef HAVE_INET_NTOA_B */
+
+/* Define to 1 if you have the `inet_ntop' function. */
+#define HAVE_INET_NTOP 1
+
+/* Define to 1 if you have the `initgroups' function. */
+#define HAVE_INITGROUPS 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_INTTYPES_H 1
+
+/* Define to 1 if you have the `ioctl' function. */
+#define HAVE_IOCTL 1
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_IO_H */
+
+/* define if your system supports kqueue */
+/* #undef HAVE_KQUEUE */
+
+/* Define to 1 if you have the `gen' library (-lgen). */
+/* #undef HAVE_LIBGEN */
+
+/* Define to 1 if you have the `gmp' library (-lgmp). */
+/* #undef HAVE_LIBGMP */
+
+/* Define to 1 if you have the `inet' library (-linet). */
+/* #undef HAVE_LIBINET */
+
+/* define if you have libtool -ltdl */
+/* #undef HAVE_LIBLTDL */
+
+/* Define to 1 if you have the `net' library (-lnet). */
+/* #undef HAVE_LIBNET */
+
+/* Define to 1 if you have the `nsl' library (-lnsl). */
+/* #undef HAVE_LIBNSL */
+
+/* Define to 1 if you have the `nsl_s' library (-lnsl_s). */
+/* #undef HAVE_LIBNSL_S */
+
+/* Define to 1 if you have the `socket' library (-lsocket). */
+/* #undef HAVE_LIBSOCKET */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_LIBUTIL_H */
+
+/* Define to 1 if you have the `V3' library (-lV3). */
+/* #undef HAVE_LIBV3 */
+
+/* Define to 1 if you have the header file. */
+#define HAVE_LIMITS_H 1
+
+/* if you have LinuxThreads */
+/* #undef HAVE_LINUX_THREADS */
+
+/* Define to 1 if you have the header file. */
+#define HAVE_LOCALE_H 1
+
+/* Define to 1 if you have the `localtime_r' function. */
+#define HAVE_LOCALTIME_R 1
+
+/* Define to 1 if you have the `lockf' function. */
+#define HAVE_LOCKF 1
+
+/* Define to 1 if the system has the type `long long'. */
+#define HAVE_LONG_LONG 1
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_LTDL_H */
+
+/* Define to 1 if you have the header file. */
+#define HAVE_MALLOC_H 1
+
+/* Define to 1 if you have the `memcpy' function. */
+#define HAVE_MEMCPY 1
+
+/* Define to 1 if you have the `memmove' function. */
+#define HAVE_MEMMOVE 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_MEMORY_H 1
+
+/* Define to 1 if you have the `memrchr' function. */
+#define HAVE_MEMRCHR 1
+
+/* Define to 1 if you have the `mkstemp' function. */
+#define HAVE_MKSTEMP 1
+
+/* Define to 1 if you have the `mktemp' function. */
+#define HAVE_MKTEMP 1
+
+/* define this if you have mkversion */
+#define HAVE_MKVERSION 1
+
+/* Define to 1 if you have the header file, and it defines `DIR'. */
+/* #undef HAVE_NDIR_H */
+
+/* Define to 1 if you have the header file. */
+#define HAVE_NETINET_TCP_H 1
+
+/* define if strerror_r returns char* instead of int */
+/* #undef HAVE_NONPOSIX_STRERROR_R */
+
+/* if you have NT Event Log */
+/* #undef HAVE_NT_EVENT_LOG */
+
+/* if you have NT Service Manager */
+/* #undef HAVE_NT_SERVICE_MANAGER */
+
+/* if you have NT Threads */
+/* #undef HAVE_NT_THREADS */
+
+/* define if you have OpenSSL */
+#define HAVE_OPENSSL 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_OPENSSL_BN_H 1
+
+/* define if you have OpenSSL with CRL checking capability */
+#define HAVE_OPENSSL_CRL 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_OPENSSL_CRYPTO_H 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_OPENSSL_SSL_H 1
+
+/* Define to 1 if you have the `pipe' function. */
+#define HAVE_PIPE 1
+
+/* Define to 1 if you have the `poll' function. */
+#define HAVE_POLL 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_POLL_H 1
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_PROCESS_H */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_PSAP_H */
+
+/* define to pthreads API spec revision */
+#define HAVE_PTHREADS 10
+
+/* define if you have pthread_detach function */
+#define HAVE_PTHREAD_DETACH 1
+
+/* Define to 1 if you have the `pthread_getconcurrency' function. */
+#define HAVE_PTHREAD_GETCONCURRENCY 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_PTHREAD_H 1
+
+/* Define to 1 if you have the `pthread_kill' function. */
+#define HAVE_PTHREAD_KILL 1
+
+/* Define to 1 if you have the `pthread_kill_other_threads_np' function. */
+/* #undef HAVE_PTHREAD_KILL_OTHER_THREADS_NP */
+
+/* define if you have pthread_rwlock_destroy function */
+#define HAVE_PTHREAD_RWLOCK_DESTROY 1
+
+/* Define to 1 if you have the `pthread_setconcurrency' function. */
+#define HAVE_PTHREAD_SETCONCURRENCY 1
+
+/* Define to 1 if you have the `pthread_yield' function. */
+#define HAVE_PTHREAD_YIELD 1
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_PTH_H */
+
+/* Define to 1 if the system has the type `ptrdiff_t'. */
+#define HAVE_PTRDIFF_T 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_PWD_H 1
+
+/* Define to 1 if you have the `read' function. */
+#define HAVE_READ 1
+
+/* Define to 1 if you have the `recv' function. */
+#define HAVE_RECV 1
+
+/* Define to 1 if you have the `recvfrom' function. */
+#define HAVE_RECVFROM 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_REGEX_H 1
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_RESOLV_H */
+
+/* define if you have res_query() */
+/* #undef HAVE_RES_QUERY */
+
+/* define if OpenSSL needs RSAref */
+/* #undef HAVE_RSAREF */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_SASL_H */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_SASL_SASL_H */
+
+/* define if your SASL library has sasl_version() */
+/* #undef HAVE_SASL_VERSION */
+
+/* Define to 1 if you have the header file. */
+#define HAVE_SCHED_H 1
+
+/* Define to 1 if you have the `sched_yield' function. */
+#define HAVE_SCHED_YIELD 1
+
+/* Define to 1 if you have the `send' function. */
+#define HAVE_SEND 1
+
+/* Define to 1 if you have the `sendmsg' function. */
+#define HAVE_SENDMSG 1
+
+/* Define to 1 if you have the `sendto' function. */
+#define HAVE_SENDTO 1
+
+/* Define to 1 if you have the `setegid' function. */
+#define HAVE_SETEGID 1
+
+/* Define to 1 if you have the `seteuid' function. */
+#define HAVE_SETEUID 1
+
+/* Define to 1 if you have the `setgid' function. */
+#define HAVE_SETGID 1
+
+/* Define to 1 if you have the `setpwfile' function. */
+/* #undef HAVE_SETPWFILE */
+
+/* Define to 1 if you have the `setsid' function. */
+#define HAVE_SETSID 1
+
+/* Define to 1 if you have the `setuid' function. */
+#define HAVE_SETUID 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_SGTTY_H 1
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_SHADOW_H */
+
+/* Define to 1 if you have the `sigaction' function. */
+#define HAVE_SIGACTION 1
+
+/* Define to 1 if you have the `signal' function. */
+#define HAVE_SIGNAL 1
+
+/* Define to 1 if you have the `sigset' function. */
+#define HAVE_SIGSET 1
+
+/* define if you have -lslp */
+/* #undef HAVE_SLP */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_SLP_H */
+
+/* Define to 1 if you have the `snprintf' function. */
+#define HAVE_SNPRINTF 1
+
+/* if you have spawnlp() */
+/* #undef HAVE_SPAWNLP */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_SQLEXT_H */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_SQL_H */
+
+/* Define to 1 if you have the header file. */
+#define HAVE_STDDEF_H 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_STDINT_H 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_STDLIB_H 1
+
+/* Define to 1 if you have the `strdup' function. */
+#define HAVE_STRDUP 1
+
+/* Define to 1 if you have the `strerror' function. */
+#define HAVE_STRERROR 1
+
+/* Define to 1 if you have the `strerror_r' function. */
+#define HAVE_STRERROR_R 1
+
+/* Define to 1 if you have the `strftime' function. */
+#define HAVE_STRFTIME 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_STRINGS_H 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_STRING_H 1
+
+/* Define to 1 if you have the `strpbrk' function. */
+#define HAVE_STRPBRK 1
+
+/* Define to 1 if you have the `strrchr' function. */
+#define HAVE_STRRCHR 1
+
+/* Define to 1 if you have the `strsep' function. */
+#define HAVE_STRSEP 1
+
+/* Define to 1 if you have the `strspn' function. */
+#define HAVE_STRSPN 1
+
+/* Define to 1 if you have the `strstr' function. */
+#define HAVE_STRSTR 1
+
+/* Define to 1 if you have the `strtol' function. */
+#define HAVE_STRTOL 1
+
+/* Define to 1 if you have the `strtoll' function. */
+#define HAVE_STRTOLL 1
+
+/* Define to 1 if you have the `strtoq' function. */
+#define HAVE_STRTOQ 1
+
+/* Define to 1 if you have the `strtoul' function. */
+#define HAVE_STRTOUL 1
+
+/* Define to 1 if you have the `strtoull' function. */
+#define HAVE_STRTOULL 1
+
+/* Define to 1 if you have the `strtouq' function. */
+#define HAVE_STRTOUQ 1
+
+/* Define to 1 if `msg_accrightslen' is a member of `struct msghdr'. */
+/* #undef HAVE_STRUCT_MSGHDR_MSG_ACCRIGHTSLEN */
+
+/* Define to 1 if `msg_control' is a member of `struct msghdr'. */
+#define HAVE_STRUCT_MSGHDR_MSG_CONTROL 1
+
+/* Define to 1 if `pw_gecos' is a member of `struct passwd'. */
+#define HAVE_STRUCT_PASSWD_PW_GECOS 1
+
+/* Define to 1 if `pw_passwd' is a member of `struct passwd'. */
+#define HAVE_STRUCT_PASSWD_PW_PASSWD 1
+
+/* Define to 1 if `st_blksize' is a member of `struct stat'. */
+#define HAVE_STRUCT_STAT_ST_BLKSIZE 1
+
+/* Define to 1 if `st_fstype' is a member of `struct stat'. */
+/* #undef HAVE_STRUCT_STAT_ST_FSTYPE */
+
+/* define to 1 if st_fstype is char * */
+/* #undef HAVE_STRUCT_STAT_ST_FSTYPE_CHAR */
+
+/* define to 1 if st_fstype is int */
+/* #undef HAVE_STRUCT_STAT_ST_FSTYPE_INT */
+
+/* Define to 1 if `st_vfstype' is a member of `struct stat'. */
+/* #undef HAVE_STRUCT_STAT_ST_VFSTYPE */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_SYNCH_H */
+
+/* Define to 1 if you have the `sysconf' function. */
+#define HAVE_SYSCONF 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_SYSEXITS_H 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_SYSLOG_H 1
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_SYS_DEVPOLL_H */
+
+/* Define to 1 if you have the header file, and it defines `DIR'.
+ */
+/* #undef HAVE_SYS_DIR_H */
+
+/* Define to 1 if you have the header file. */
+#define HAVE_SYS_EPOLL_H 1
+
+/* define if you actually have sys_errlist in your libs */
+#define HAVE_SYS_ERRLIST 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_SYS_ERRNO_H 1
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_SYS_EVENT_H */
+
+/* Define to 1 if you have the header file. */
+#define HAVE_SYS_FILE_H 1
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_SYS_FILIO_H */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_SYS_FSTYP_H */
+
+/* Define to 1 if you have the header file. */
+#define HAVE_SYS_IOCTL_H 1
+
+/* Define to 1 if you have the header file, and it defines `DIR'.
+ */
+/* #undef HAVE_SYS_NDIR_H */
+
+/* Define to 1 if you have the header file. */
+#define HAVE_SYS_PARAM_H 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_SYS_POLL_H 1
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_SYS_PRIVGRP_H */
+
+/* Define to 1 if you have the header file. */
+#define HAVE_SYS_RESOURCE_H 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_SYS_SELECT_H 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_SYS_SOCKET_H 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_SYS_STAT_H 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_SYS_SYSLOG_H 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_SYS_TIME_H 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_SYS_TYPES_H 1
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_SYS_UCRED_H */
+
+/* Define to 1 if you have the header file. */
+#define HAVE_SYS_UIO_H 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_SYS_UN_H 1
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_SYS_UUID_H */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_SYS_VMOUNT_H */
+
+/* Define to 1 if you have that is POSIX.1 compatible. */
+#define HAVE_SYS_WAIT_H 1
+
+/* define if you have -lwrap */
+/* #undef HAVE_TCPD */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_TCPD_H */
+
+/* Define to 1 if you have the header file. */
+#define HAVE_TERMIOS_H 1
+
+/* if you have Solaris LWP (thr) package */
+/* #undef HAVE_THR */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_THREAD_H */
+
+/* Define to 1 if you have the `thr_getconcurrency' function. */
+/* #undef HAVE_THR_GETCONCURRENCY */
+
+/* Define to 1 if you have the `thr_setconcurrency' function. */
+/* #undef HAVE_THR_SETCONCURRENCY */
+
+/* Define to 1 if you have the `thr_yield' function. */
+/* #undef HAVE_THR_YIELD */
+
+/* define if you have TLS */
+#define HAVE_TLS 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_UNISTD_H 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_UTIME_H 1
+
+/* define if you have uuid_generate() */
+/* #undef HAVE_UUID_GENERATE */
+
+/* define if you have uuid_to_str() */
+/* #undef HAVE_UUID_TO_STR */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_UUID_UUID_H */
+
+/* Define to 1 if you have the `vprintf' function. */
+#define HAVE_VPRINTF 1
+
+/* Define to 1 if you have the `vsnprintf' function. */
+#define HAVE_VSNPRINTF 1
+
+/* Define to 1 if you have the `wait4' function. */
+#define HAVE_WAIT4 1
+
+/* Define to 1 if you have the `waitpid' function. */
+#define HAVE_WAITPID 1
+
+/* define if you have winsock */
+/* #undef HAVE_WINSOCK */
+
+/* define if you have winsock2 */
+/* #undef HAVE_WINSOCK2 */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_WINSOCK2_H */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_WINSOCK_H */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_WIREDTIGER_H */
+
+/* Define to 1 if you have the `write' function. */
+#define HAVE_WRITE 1
+
+/* define if select implicitly yields */
+#define HAVE_YIELDING_SELECT 1
+
+/* Define to 1 if you have the `_vsnprintf' function. */
+/* #undef HAVE__VSNPRINTF */
+
+/* define to 32-bit or greater integer type */
+#define LBER_INT_T int
+
+/* define to large integer type */
+#define LBER_LEN_T long
+
+/* define to socket descriptor type */
+#define LBER_SOCKET_T int
+
+/* define to large integer type */
+#define LBER_TAG_T long
+
+/* define to 1 if library is thread safe */
+#define LDAP_API_FEATURE_X_OPENLDAP_THREAD_SAFE 1
+
+/* define to LDAP VENDOR VERSION */
+/* #undef LDAP_API_FEATURE_X_OPENLDAP_V2_REFERRALS */
+
+/* define this to add debugging code */
+/* #undef LDAP_DEBUG */
+
+/* define if LDAP libs are dynamic */
+/* #undef LDAP_LIBS_DYNAMIC */
+
+/* define to support PF_INET6 */
+#define LDAP_PF_INET6 1
+
+/* define to support PF_LOCAL */
+#define LDAP_PF_LOCAL 1
+
+/* define this to add SLAPI code */
+/* #undef LDAP_SLAPI */
+
+/* define this to add syslog code */
+/* #undef LDAP_SYSLOG */
+
+/* Version */
+#define LDAP_VENDOR_VERSION 20501
+
+/* Major */
+#define LDAP_VENDOR_VERSION_MAJOR 2
+
+/* Minor */
+#define LDAP_VENDOR_VERSION_MINOR 5
+
+/* Patch */
+#define LDAP_VENDOR_VERSION_PATCH X
+
+/* Define to the sub-directory where libtool stores uninstalled libraries. */
+#define LT_OBJDIR ".libs/"
+
+/* define if memcmp is not 8-bit clean or is otherwise broken */
+/* #undef NEED_MEMCMP_REPLACEMENT */
+
+/* define if you have (or want) no threads */
+/* #undef NO_THREADS */
+
+/* define to use the original debug style */
+/* #undef OLD_DEBUG */
+
+/* Package */
+#define OPENLDAP_PACKAGE "OpenLDAP"
+
+/* Version */
+#define OPENLDAP_VERSION "2.5.X"
+
+/* Define to the address where bug reports for this package should be sent. */
+#define PACKAGE_BUGREPORT ""
+
+/* Define to the full name of this package. */
+#define PACKAGE_NAME ""
+
+/* Define to the full name and version of this package. */
+#define PACKAGE_STRING ""
+
+/* Define to the one symbol short name of this package. */
+#define PACKAGE_TARNAME ""
+
+/* Define to the home page for this package. */
+#define PACKAGE_URL ""
+
+/* Define to the version of this package. */
+#define PACKAGE_VERSION ""
+
+/* define if sched_yield yields the entire process */
+/* #undef REPLACE_BROKEN_YIELD */
+
+/* Define as the return type of signal handlers (`int' or `void'). */
+#define RETSIGTYPE void
+
+/* Define to the type of arg 1 for `select'. */
+#define SELECT_TYPE_ARG1 int
+
+/* Define to the type of args 2, 3 and 4 for `select'. */
+#define SELECT_TYPE_ARG234 (fd_set *)
+
+/* Define to the type of arg 5 for `select'. */
+#define SELECT_TYPE_ARG5 (struct timeval *)
+
+/* The size of `int', as computed by sizeof. */
+#define SIZEOF_INT 4
+
+/* The size of `long', as computed by sizeof. */
+#define SIZEOF_LONG 8
+
+/* The size of `long long', as computed by sizeof. */
+#define SIZEOF_LONG_LONG 8
+
+/* The size of `short', as computed by sizeof. */
+#define SIZEOF_SHORT 2
+
+/* The size of `wchar_t', as computed by sizeof. */
+#define SIZEOF_WCHAR_T 4
+
+/* define to support per-object ACIs */
+/* #undef SLAPD_ACI_ENABLED */
+
+/* define to support LDAP Async Metadirectory backend */
+/* #undef SLAPD_ASYNCMETA */
+
+/* define to support cleartext passwords */
+/* #undef SLAPD_CLEARTEXT */
+
+/* define to support crypt(3) passwords */
+/* #undef SLAPD_CRYPT */
+
+/* define to support DNS SRV backend */
+/* #undef SLAPD_DNSSRV */
+
+/* define to support LDAP backend */
+/* #undef SLAPD_LDAP */
+
+/* define to support MDB backend */
+/* #undef SLAPD_MDB */
+
+/* define to support LDAP Metadirectory backend */
+/* #undef SLAPD_META */
+
+/* define to support modules */
+/* #undef SLAPD_MODULES */
+
+/* dynamically linked module */
+#define SLAPD_MOD_DYNAMIC 2
+
+/* statically linked module */
+#define SLAPD_MOD_STATIC 1
+
+/* define to support cn=Monitor backend */
+/* #undef SLAPD_MONITOR */
+
+/* define to support NDB backend */
+/* #undef SLAPD_NDB */
+
+/* define to support NULL backend */
+/* #undef SLAPD_NULL */
+
+/* define for In-Directory Access Logging overlay */
+/* #undef SLAPD_OVER_ACCESSLOG */
+
+/* define for Audit Logging overlay */
+/* #undef SLAPD_OVER_AUDITLOG */
+
+/* define for Automatic Certificate Authority overlay */
+/* #undef SLAPD_OVER_AUTOCA */
+
+/* define for Collect overlay */
+/* #undef SLAPD_OVER_COLLECT */
+
+/* define for Attribute Constraint overlay */
+/* #undef SLAPD_OVER_CONSTRAINT */
+
+/* define for Dynamic Directory Services overlay */
+/* #undef SLAPD_OVER_DDS */
+
+/* define for Dynamic Directory Services overlay */
+/* #undef SLAPD_OVER_DEREF */
+
+/* define for Dynamic Group overlay */
+/* #undef SLAPD_OVER_DYNGROUP */
+
+/* define for Dynamic List overlay */
+/* #undef SLAPD_OVER_DYNLIST */
+
+/* define for Reverse Group Membership overlay */
+/* #undef SLAPD_OVER_MEMBEROF */
+
+/* define for Password Policy overlay */
+/* #undef SLAPD_OVER_PPOLICY */
+
+/* define for Proxy Cache overlay */
+/* #undef SLAPD_OVER_PROXYCACHE */
+
+/* define for Referential Integrity overlay */
+/* #undef SLAPD_OVER_REFINT */
+
+/* define for Return Code overlay */
+/* #undef SLAPD_OVER_RETCODE */
+
+/* define for Rewrite/Remap overlay */
+/* #undef SLAPD_OVER_RWM */
+
+/* define for Sequential Modify overlay */
+/* #undef SLAPD_OVER_SEQMOD */
+
+/* define for ServerSideSort/VLV overlay */
+/* #undef SLAPD_OVER_SSSVLV */
+
+/* define for Syncrepl Provider overlay */
+/* #undef SLAPD_OVER_SYNCPROV */
+
+/* define for Translucent Proxy overlay */
+/* #undef SLAPD_OVER_TRANSLUCENT */
+
+/* define for Attribute Uniqueness overlay */
+/* #undef SLAPD_OVER_UNIQUE */
+
+/* define for Value Sorting overlay */
+/* #undef SLAPD_OVER_VALSORT */
+
+/* define to support PASSWD backend */
+/* #undef SLAPD_PASSWD */
+
+/* define to support PERL backend */
+/* #undef SLAPD_PERL */
+
+/* define to support relay backend */
+/* #undef SLAPD_RELAY */
+
+/* define to support reverse lookups */
+/* #undef SLAPD_RLOOKUPS */
+
+/* define to support SHELL backend */
+/* #undef SLAPD_SHELL */
+
+/* define to support SOCK backend */
+/* #undef SLAPD_SOCK */
+
+/* define to support SASL passwords */
+/* #undef SLAPD_SPASSWD */
+
+/* define to support SQL backend */
+/* #undef SLAPD_SQL */
+
+/* define to support WiredTiger backend */
+/* #undef SLAPD_WT */
+
+/* define to support run-time loadable ACL */
+/* #undef SLAP_DYNACL */
+
+/* Define to 1 if you have the ANSI C header files. */
+#define STDC_HEADERS 1
+
+/* Define to 1 if you can safely include both and . */
+#define TIME_WITH_SYS_TIME 1
+
+/* Define to 1 if your declares `struct tm'. */
+/* #undef TM_IN_SYS_TIME */
+
+/* set to urandom device */
+#define URANDOM_DEVICE "/dev/urandom"
+
+/* define to use OpenSSL BIGNUM for MP */
+/* #undef USE_MP_BIGNUM */
+
+/* define to use GMP for MP */
+/* #undef USE_MP_GMP */
+
+/* define to use 'long' for MP */
+/* #undef USE_MP_LONG */
+
+/* define to use 'long long' for MP */
+/* #undef USE_MP_LONG_LONG */
+
+/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most
+ significant byte first (like Motorola and SPARC, unlike Intel). */
+#if defined AC_APPLE_UNIVERSAL_BUILD
+# if defined __BIG_ENDIAN__
+# define WORDS_BIGENDIAN 1
+# endif
+#else
+# ifndef WORDS_BIGENDIAN
+/* # undef WORDS_BIGENDIAN */
+# endif
+#endif
+
+/* Define to the type of arg 3 for `accept'. */
+#define ber_socklen_t socklen_t
+
+/* Define to `char *' if does not define. */
+/* #undef caddr_t */
+
+/* Define to empty if `const' does not conform to ANSI C. */
+/* #undef const */
+
+/* Define to `int' if doesn't define. */
+/* #undef gid_t */
+
+/* Define to `int' if does not define. */
+/* #undef mode_t */
+
+/* Define to `long' if does not define. */
+/* #undef off_t */
+
+/* Define to `int' if does not define. */
+/* #undef pid_t */
+
+/* Define to `int' if does not define. */
+/* #undef sig_atomic_t */
+
+/* Define to `unsigned' if does not define. */
+/* #undef size_t */
+
+/* define to snprintf routine */
+/* #undef snprintf */
+
+/* Define like ber_socklen_t if does not define. */
+/* #undef socklen_t */
+
+/* Define to `signed int' if does not define. */
+/* #undef ssize_t */
+
+/* Define to `int' if doesn't define. */
+/* #undef uid_t */
+
+/* define as empty if volatile is not supported */
+/* #undef volatile */
+
+/* define to snprintf routine */
+/* #undef vsnprintf */
+
+
+/* begin of portable.h.post */
+
+#ifdef _WIN32
+/* don't suck in all of the win32 api */
+# define WIN32_LEAN_AND_MEAN 1
+#endif
+
+#ifndef LDAP_NEEDS_PROTOTYPES
+/* force LDAP_P to always include prototypes */
+#define LDAP_NEEDS_PROTOTYPES 1
+#endif
+
+#ifndef LDAP_REL_ENG
+#if (LDAP_VENDOR_VERSION == 000000) && !defined(LDAP_DEVEL)
+#define LDAP_DEVEL
+#endif
+#if defined(LDAP_DEVEL) && !defined(LDAP_TEST)
+#define LDAP_TEST
+#endif
+#endif
+
+#ifdef HAVE_STDDEF_H
+# include
+#endif
+
+#ifdef HAVE_EBCDIC
+/* ASCII/EBCDIC converting replacements for stdio funcs
+ * vsnprintf and snprintf are used too, but they are already
+ * checked by the configure script
+ */
+#define fputs ber_pvt_fputs
+#define fgets ber_pvt_fgets
+#define printf ber_pvt_printf
+#define fprintf ber_pvt_fprintf
+#define vfprintf ber_pvt_vfprintf
+#define vsprintf ber_pvt_vsprintf
+#endif
+
+#include "ac/fdset.h"
+
+#include "ldap_cdefs.h"
+#include "ldap_features.h"
+
+#include "ac/assert.h"
+#include "ac/localize.h"
+
+#endif /* _LDAP_PORTABLE_H */
+/* end of portable.h.post */
+
diff --git a/debian/changelog b/debian/changelog
index 23d63b41099..be77dfdefe9 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,5 +1,5 @@
-clickhouse (21.4.1.1) unstable; urgency=low
+clickhouse (21.5.1.1) unstable; urgency=low
* Modified source code
- -- clickhouse-release Sat, 06 Mar 2021 14:43:27 +0300
+ -- clickhouse-release Fri, 02 Apr 2021 18:34:26 +0300
diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile
index d9cd68254b7..2efba9735ae 100644
--- a/docker/client/Dockerfile
+++ b/docker/client/Dockerfile
@@ -1,7 +1,7 @@
FROM ubuntu:18.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
-ARG version=21.4.1.*
+ARG version=21.5.1.*
RUN apt-get update \
&& apt-get install --yes --no-install-recommends \
diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile
index d22516eab0a..05ca29f22d4 100644
--- a/docker/server/Dockerfile
+++ b/docker/server/Dockerfile
@@ -1,7 +1,7 @@
FROM ubuntu:20.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
-ARG version=21.4.1.*
+ARG version=21.5.1.*
ARG gosu_ver=1.10
# set non-empty deb_location_url url to create a docker image
diff --git a/docker/test/Dockerfile b/docker/test/Dockerfile
index e727d2a3ecf..976c46ebe27 100644
--- a/docker/test/Dockerfile
+++ b/docker/test/Dockerfile
@@ -1,7 +1,7 @@
FROM ubuntu:18.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
-ARG version=21.4.1.*
+ARG version=21.5.1.*
RUN apt-get update && \
apt-get install -y apt-transport-https dirmngr && \
diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile
index 64be52d8e30..2864f7fc4da 100644
--- a/docker/test/fasttest/Dockerfile
+++ b/docker/test/fasttest/Dockerfile
@@ -1,7 +1,7 @@
# docker build -t yandex/clickhouse-fasttest .
FROM ubuntu:20.04
-ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=10
+ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=11
RUN apt-get update \
&& apt-get install ca-certificates lsb-release wget gnupg apt-transport-https \
@@ -43,20 +43,20 @@ RUN apt-get update \
clang-tidy-${LLVM_VERSION} \
cmake \
curl \
- lsof \
expect \
fakeroot \
- git \
gdb \
+ git \
gperf \
lld-${LLVM_VERSION} \
llvm-${LLVM_VERSION} \
+ lsof \
moreutils \
ninja-build \
psmisc \
python3 \
- python3-pip \
python3-lxml \
+ python3-pip \
python3-requests \
python3-termcolor \
rename \
diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh
index c8bfce3848d..c21a115289d 100755
--- a/docker/test/fasttest/run.sh
+++ b/docker/test/fasttest/run.sh
@@ -8,6 +8,9 @@ trap 'kill $(jobs -pr) ||:' EXIT
# that we can run the "everything else" stage from the cloned source.
stage=${stage:-}
+# Compiler version, normally set by Dockerfile
+export LLVM_VERSION=${LLVM_VERSION:-11}
+
# A variable to pass additional flags to CMake.
# Here we explicitly default it to nothing so that bash doesn't complain about
# it being undefined. Also read it as array so that we can pass an empty list
@@ -124,22 +127,26 @@ continue
function clone_root
{
- git clone https://github.com/ClickHouse/ClickHouse.git -- "$FASTTEST_SOURCE" | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/clone_log.txt"
+ git clone --depth 1 https://github.com/ClickHouse/ClickHouse.git -- "$FASTTEST_SOURCE" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/clone_log.txt"
(
cd "$FASTTEST_SOURCE"
if [ "$PULL_REQUEST_NUMBER" != "0" ]; then
- if git fetch origin "+refs/pull/$PULL_REQUEST_NUMBER/merge"; then
+ if git fetch --depth 1 origin "+refs/pull/$PULL_REQUEST_NUMBER/merge"; then
git checkout FETCH_HEAD
- echo 'Clonned merge head'
+ echo "Checked out pull/$PULL_REQUEST_NUMBER/merge ($(git rev-parse FETCH_HEAD))"
else
- git fetch origin "+refs/pull/$PULL_REQUEST_NUMBER/head"
+ git fetch --depth 1 origin "+refs/pull/$PULL_REQUEST_NUMBER/head"
git checkout "$COMMIT_SHA"
- echo 'Checked out to commit'
+ echo "Checked out nominal SHA $COMMIT_SHA for PR $PULL_REQUEST_NUMBER"
fi
else
if [ -v COMMIT_SHA ]; then
+ git fetch --depth 1 origin "$COMMIT_SHA"
git checkout "$COMMIT_SHA"
+ echo "Checked out nominal SHA $COMMIT_SHA for master"
+ else
+ echo "Using default repository head $(git rev-parse HEAD)"
fi
fi
)
@@ -181,7 +188,7 @@ function clone_submodules
)
git submodule sync
- git submodule update --init --recursive "${SUBMODULES_TO_UPDATE[@]}"
+ git submodule update --depth 1 --init --recursive "${SUBMODULES_TO_UPDATE[@]}"
git submodule foreach git reset --hard
git submodule foreach git checkout @ -f
git submodule foreach git clean -xfd
@@ -215,7 +222,7 @@ function run_cmake
(
cd "$FASTTEST_BUILD"
- cmake "$FASTTEST_SOURCE" -DCMAKE_CXX_COMPILER=clang++-10 -DCMAKE_C_COMPILER=clang-10 "${CMAKE_LIBS_CONFIG[@]}" "${FASTTEST_CMAKE_FLAGS[@]}" | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/cmake_log.txt"
+ cmake "$FASTTEST_SOURCE" -DCMAKE_CXX_COMPILER="clang++-${LLVM_VERSION}" -DCMAKE_C_COMPILER="clang-${LLVM_VERSION}" "${CMAKE_LIBS_CONFIG[@]}" "${FASTTEST_CMAKE_FLAGS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/cmake_log.txt"
)
}
@@ -223,7 +230,7 @@ function build
{
(
cd "$FASTTEST_BUILD"
- time ninja clickhouse-bundle | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/build_log.txt"
+ time ninja clickhouse-bundle 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/build_log.txt"
if [ "$COPY_CLICKHOUSE_BINARY_TO_OUTPUT" -eq "1" ]; then
cp programs/clickhouse "$FASTTEST_OUTPUT/clickhouse"
fi
@@ -420,7 +427,7 @@ case "$stage" in
# See the compatibility hacks in `clone_root` stage above. Remove at the same time,
# after Nov 1, 2020.
cd "$FASTTEST_WORKSPACE"
- clone_submodules | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/submodule_log.txt"
+ clone_submodules 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/submodule_log.txt"
;&
"run_cmake")
run_cmake
@@ -431,7 +438,7 @@ case "$stage" in
"configure")
# The `install_log.txt` is also needed for compatibility with old CI task --
# if there is no log, it will decide that build failed.
- configure | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/install_log.txt"
+ configure 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/install_log.txt"
;&
"run_tests")
run_tests
diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh
index 852c6415d13..20132eafb75 100755
--- a/docker/test/stateless/run.sh
+++ b/docker/test/stateless/run.sh
@@ -74,12 +74,17 @@ function run_tests()
ADDITIONAL_OPTIONS+=('--order=random')
ADDITIONAL_OPTIONS+=('--skip')
ADDITIONAL_OPTIONS+=('00000_no_tests_to_skip')
- ADDITIONAL_OPTIONS+=('--jobs')
- ADDITIONAL_OPTIONS+=('4')
+ # Note that flaky check must be ran in parallel, but for now we run
+ # everything in parallel except DatabaseReplicated. See below.
fi
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
ADDITIONAL_OPTIONS+=('--replicated-database')
+ else
+ # Too many tests fail for DatabaseReplicated in parallel. All other
+ # configurations are OK.
+ ADDITIONAL_OPTIONS+=('--jobs')
+ ADDITIONAL_OPTIONS+=('8')
fi
clickhouse-test --testname --shard --zookeeper --hung-check --print-time \
diff --git a/docs/README.md b/docs/README.md
index 8b3066501bf..a4df023a6ad 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -126,7 +126,13 @@ Contribute all new information in English language. Other languages are translat
### Adding a New File
-When adding a new file:
+When you add a new file, it should end with a link like:
+
+`[Original article](https://clickhouse.tech/docs/) `
+
+and there should be **a new empty line** after it.
+
+{## When adding a new file:
- Make symbolic links for all other languages. You can use the following commands:
@@ -134,7 +140,7 @@ When adding a new file:
$ cd /ClickHouse/clone/directory/docs
$ ln -sr en/new/file.md lang/new/file.md
```
-
+##}
### Adding a New Language
@@ -195,8 +201,11 @@ Templates:
- [Function](_description_templates/template-function.md)
- [Setting](_description_templates/template-setting.md)
+- [Server Setting](_description_templates/template-server-setting.md)
- [Database or Table engine](_description_templates/template-engine.md)
- [System table](_description_templates/template-system-table.md)
+- [Data type](_description_templates/data-type.md)
+- [Statement](_description_templates/statement.md)
diff --git a/docs/en/development/build-osx.md b/docs/en/development/build-osx.md
index e0b1be710f1..886e85bbf86 100644
--- a/docs/en/development/build-osx.md
+++ b/docs/en/development/build-osx.md
@@ -5,43 +5,77 @@ toc_title: Build on Mac OS X
# How to Build ClickHouse on Mac OS X {#how-to-build-clickhouse-on-mac-os-x}
-Build should work on Mac OS X 10.15 (Catalina).
+Build should work on x86_64 (Intel) based macOS 10.15 (Catalina) and higher with recent Xcode's native AppleClang, or Homebrew's vanilla Clang or GCC compilers.
## Install Homebrew {#install-homebrew}
``` bash
-$ /usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"
+$ /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
```
+## Install Xcode and Command Line Tools {#install-xcode-and-command-line-tools}
+
+Install the latest [Xcode](https://apps.apple.com/am/app/xcode/id497799835?mt=12) from App Store.
+
+Open it at least once to accept the end-user license agreement and automatically install the required components.
+
+Then, make sure that the latest Comman Line Tools are installed and selected in the system:
+
+``` bash
+$ sudo rm -rf /Library/Developer/CommandLineTools
+$ sudo xcode-select --install
+```
+
+Reboot.
+
## Install Required Compilers, Tools, and Libraries {#install-required-compilers-tools-and-libraries}
``` bash
-$ brew install cmake ninja libtool gettext llvm
+$ brew update
+$ brew install cmake ninja libtool gettext llvm gcc
```
## Checkout ClickHouse Sources {#checkout-clickhouse-sources}
``` bash
-$ git clone --recursive git@github.com:ClickHouse/ClickHouse.git
-```
-
-or
-
-``` bash
-$ git clone --recursive https://github.com/ClickHouse/ClickHouse.git
-
-$ cd ClickHouse
+$ git clone --recursive git@github.com:ClickHouse/ClickHouse.git # or https://github.com/ClickHouse/ClickHouse.git
```
## Build ClickHouse {#build-clickhouse}
-> Please note: ClickHouse doesn't support build with native Apple Clang compiler, we need use clang from LLVM.
+To build using Xcode's native AppleClang compiler:
``` bash
+$ cd ClickHouse
+$ rm -rf build
$ mkdir build
$ cd build
-$ cmake .. -DCMAKE_C_COMPILER=`brew --prefix llvm`/bin/clang -DCMAKE_CXX_COMPILER=`brew --prefix llvm`/bin/clang++ -DCMAKE_PREFIX_PATH=`brew --prefix llvm`
-$ ninja
+$ cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo -DENABLE_JEMALLOC=OFF ..
+$ cmake --build . --config RelWithDebInfo
+$ cd ..
+```
+
+To build using Homebrew's vanilla Clang compiler:
+
+``` bash
+$ cd ClickHouse
+$ rm -rf build
+$ mkdir build
+$ cd build
+$ cmake -DCMAKE_C_COMPILER=$(brew --prefix llvm)/bin/clang -DCMAKE_CXX_COMPILER==$(brew --prefix llvm)/bin/clang++ -DCMAKE_BUILD_TYPE=RelWithDebInfo -DENABLE_JEMALLOC=OFF ..
+$ cmake --build . --config RelWithDebInfo
+$ cd ..
+```
+
+To build using Homebrew's vanilla GCC compiler:
+
+``` bash
+$ cd ClickHouse
+$ rm -rf build
+$ mkdir build
+$ cd build
+$ cmake -DCMAKE_C_COMPILER=$(brew --prefix gcc)/bin/gcc-10 -DCMAKE_CXX_COMPILER=$(brew --prefix gcc)/bin/g++-10 -DCMAKE_BUILD_TYPE=RelWithDebInfo -DENABLE_JEMALLOC=OFF ..
+$ cmake --build . --config RelWithDebInfo
$ cd ..
```
diff --git a/docs/en/engines/database-engines/index.md b/docs/en/engines/database-engines/index.md
index 2db11998483..b6892099378 100644
--- a/docs/en/engines/database-engines/index.md
+++ b/docs/en/engines/database-engines/index.md
@@ -18,4 +18,8 @@ You can also use the following database engines:
- [Lazy](../../engines/database-engines/lazy.md)
+- [Atomic](../../engines/database-engines/atomic.md)
+
+- [PostgreSQL](../../engines/database-engines/postgresql.md)
+
[Original article](https://clickhouse.tech/docs/en/database_engines/)
diff --git a/docs/en/engines/database-engines/postgresql.md b/docs/en/engines/database-engines/postgresql.md
new file mode 100644
index 00000000000..1fa86b7ac21
--- /dev/null
+++ b/docs/en/engines/database-engines/postgresql.md
@@ -0,0 +1,138 @@
+---
+toc_priority: 35
+toc_title: PostgreSQL
+---
+
+# PostgreSQL {#postgresql}
+
+Allows to connect to databases on a remote [PostgreSQL](https://www.postgresql.org) server. Supports read and write operations (`SELECT` and `INSERT` queries) to exchange data between ClickHouse and PostgreSQL.
+
+Gives the real-time access to table list and table structure from remote PostgreSQL with the help of `SHOW TABLES` and `DESCRIBE TABLE` queries.
+
+Supports table structure modifications (`ALTER TABLE ... ADD|DROP COLUMN`). If `use_table_cache` parameter (see the Engine Parameters below) it set to `1`, the table structure is cached and not checked for being modified, but can be updated with `DETACH` and `ATTACH` queries.
+
+## Creating a Database {#creating-a-database}
+
+``` sql
+CREATE DATABASE test_database
+ENGINE = PostgreSQL('host:port', 'database', 'user', 'password'[, `use_table_cache`]);
+```
+
+**Engine Parameters**
+
+- `host:port` — PostgreSQL server address.
+- `database` — Remote database name.
+- `user` — PostgreSQL user.
+- `password` — User password.
+- `use_table_cache` — Defines if the database table structure is cached or not. Optional. Default value: `0`.
+
+## Data Types Support {#data_types-support}
+
+| PostgerSQL | ClickHouse |
+|------------------|--------------------------------------------------------------|
+| DATE | [Date](../../sql-reference/data-types/date.md) |
+| TIMESTAMP | [DateTime](../../sql-reference/data-types/datetime.md) |
+| REAL | [Float32](../../sql-reference/data-types/float.md) |
+| DOUBLE | [Float64](../../sql-reference/data-types/float.md) |
+| DECIMAL, NUMERIC | [Decimal](../../sql-reference/data-types/decimal.md) |
+| SMALLINT | [Int16](../../sql-reference/data-types/int-uint.md) |
+| INTEGER | [Int32](../../sql-reference/data-types/int-uint.md) |
+| BIGINT | [Int64](../../sql-reference/data-types/int-uint.md) |
+| SERIAL | [UInt32](../../sql-reference/data-types/int-uint.md) |
+| BIGSERIAL | [UInt64](../../sql-reference/data-types/int-uint.md) |
+| TEXT, CHAR | [String](../../sql-reference/data-types/string.md) |
+| INTEGER | Nullable([Int32](../../sql-reference/data-types/int-uint.md))|
+| ARRAY | [Array](../../sql-reference/data-types/array.md) |
+
+
+## Examples of Use {#examples-of-use}
+
+Database in ClickHouse, exchanging data with the PostgreSQL server:
+
+``` sql
+CREATE DATABASE test_database
+ENGINE = PostgreSQL('postgres1:5432', 'test_database', 'postgres', 'mysecretpassword', 1);
+```
+
+``` sql
+SHOW DATABASES;
+```
+
+``` text
+┌─name──────────┐
+│ default │
+│ test_database │
+│ system │
+└───────────────┘
+```
+
+``` sql
+SHOW TABLES FROM test_database;
+```
+
+``` text
+┌─name───────┐
+│ test_table │
+└────────────┘
+```
+
+Reading data from the PostgreSQL table:
+
+``` sql
+SELECT * FROM test_database.test_table;
+```
+
+``` text
+┌─id─┬─value─┐
+│ 1 │ 2 │
+└────┴───────┘
+```
+
+Writing data to the PostgreSQL table:
+
+``` sql
+INSERT INTO test_database.test_table VALUES (3,4);
+SELECT * FROM test_database.test_table;
+```
+
+``` text
+┌─int_id─┬─value─┐
+│ 1 │ 2 │
+│ 3 │ 4 │
+└────────┴───────┘
+```
+
+Consider the table structure was modified in PostgreSQL:
+
+``` sql
+postgre> ALTER TABLE test_table ADD COLUMN data Text
+```
+
+As the `use_table_cache` parameter was set to `1` when the database was created, the table structure in ClickHouse was cached and therefore not modified:
+
+``` sql
+DESCRIBE TABLE test_database.test_table;
+```
+``` text
+┌─name───┬─type──────────────┐
+│ id │ Nullable(Integer) │
+│ value │ Nullable(Integer) │
+└────────┴───────────────────┘
+```
+
+After detaching the table and attaching it again, the structure was updated:
+
+``` sql
+DETACH TABLE test_database.test_table;
+ATTACH TABLE test_database.test_table;
+DESCRIBE TABLE test_database.test_table;
+```
+``` text
+┌─name───┬─type──────────────┐
+│ id │ Nullable(Integer) │
+│ value │ Nullable(Integer) │
+│ data │ Nullable(String) │
+└────────┴───────────────────┘
+```
+
+[Original article](https://clickhouse.tech/docs/en/database-engines/postgresql/)
diff --git a/docs/en/engines/table-engines/index.md b/docs/en/engines/table-engines/index.md
index e60cdf3c899..eb4fc583f88 100644
--- a/docs/en/engines/table-engines/index.md
+++ b/docs/en/engines/table-engines/index.md
@@ -47,12 +47,17 @@ Engines for communicating with other data storage and processing systems.
Engines in the family:
-- [Kafka](../../engines/table-engines/integrations/kafka.md#kafka)
-- [MySQL](../../engines/table-engines/integrations/mysql.md#mysql)
-- [ODBC](../../engines/table-engines/integrations/odbc.md#table-engine-odbc)
-- [JDBC](../../engines/table-engines/integrations/jdbc.md#table-engine-jdbc)
-- [HDFS](../../engines/table-engines/integrations/hdfs.md#hdfs)
-- [S3](../../engines/table-engines/integrations/s3.md#table-engine-s3)
+
+- [ODBC](../../engines/table-engines/integrations/odbc.md)
+- [JDBC](../../engines/table-engines/integrations/jdbc.md)
+- [MySQL](../../engines/table-engines/integrations/mysql.md)
+- [MongoDB](../../engines/table-engines/integrations/mongodb.md)
+- [HDFS](../../engines/table-engines/integrations/hdfs.md)
+- [S3](../../engines/table-engines/integrations/s3.md)
+- [Kafka](../../engines/table-engines/integrations/kafka.md)
+- [EmbeddedRocksDB](../../engines/table-engines/integrations/embedded-rocksdb.md)
+- [RabbitMQ](../../engines/table-engines/integrations/rabbitmq.md)
+- [PostgreSQL](../../engines/table-engines/integrations/postgresql.md)
### Special Engines {#special-engines}
diff --git a/docs/en/engines/table-engines/integrations/embedded-rocksdb.md b/docs/en/engines/table-engines/integrations/embedded-rocksdb.md
index e9e069933e5..88c8973eeab 100644
--- a/docs/en/engines/table-engines/integrations/embedded-rocksdb.md
+++ b/docs/en/engines/table-engines/integrations/embedded-rocksdb.md
@@ -1,5 +1,5 @@
---
-toc_priority: 6
+toc_priority: 9
toc_title: EmbeddedRocksDB
---
diff --git a/docs/en/engines/table-engines/integrations/hdfs.md b/docs/en/engines/table-engines/integrations/hdfs.md
index 0782efe8e72..cf4bb5ecbf7 100644
--- a/docs/en/engines/table-engines/integrations/hdfs.md
+++ b/docs/en/engines/table-engines/integrations/hdfs.md
@@ -1,5 +1,5 @@
---
-toc_priority: 4
+toc_priority: 6
toc_title: HDFS
---
diff --git a/docs/en/engines/table-engines/integrations/index.md b/docs/en/engines/table-engines/integrations/index.md
index 28f38375448..eb1c5411e18 100644
--- a/docs/en/engines/table-engines/integrations/index.md
+++ b/docs/en/engines/table-engines/integrations/index.md
@@ -1,6 +1,6 @@
---
toc_folder_title: Integrations
-toc_priority: 30
+toc_priority: 1
---
# Table Engines for Integrations {#table-engines-for-integrations}
@@ -19,5 +19,3 @@ List of supported integrations:
- [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md)
- [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md)
- [PostgreSQL](../../../engines/table-engines/integrations/postgresql.md)
-
-[Original article](https://clickhouse.tech/docs/en/engines/table-engines/integrations/)
diff --git a/docs/en/engines/table-engines/integrations/jdbc.md b/docs/en/engines/table-engines/integrations/jdbc.md
index edbc5d3ed3e..82efb842ae7 100644
--- a/docs/en/engines/table-engines/integrations/jdbc.md
+++ b/docs/en/engines/table-engines/integrations/jdbc.md
@@ -1,5 +1,5 @@
---
-toc_priority: 2
+toc_priority: 3
toc_title: JDBC
---
diff --git a/docs/en/engines/table-engines/integrations/kafka.md b/docs/en/engines/table-engines/integrations/kafka.md
index 0ec50094a27..2eebf5bdb92 100644
--- a/docs/en/engines/table-engines/integrations/kafka.md
+++ b/docs/en/engines/table-engines/integrations/kafka.md
@@ -1,5 +1,5 @@
---
-toc_priority: 5
+toc_priority: 8
toc_title: Kafka
---
diff --git a/docs/en/engines/table-engines/integrations/mongodb.md b/docs/en/engines/table-engines/integrations/mongodb.md
index 2fee27ce80d..a378ab03f55 100644
--- a/docs/en/engines/table-engines/integrations/mongodb.md
+++ b/docs/en/engines/table-engines/integrations/mongodb.md
@@ -1,5 +1,5 @@
---
-toc_priority: 7
+toc_priority: 5
toc_title: MongoDB
---
diff --git a/docs/en/engines/table-engines/integrations/mysql.md b/docs/en/engines/table-engines/integrations/mysql.md
index 8b7caa12c91..3847e7a9e0e 100644
--- a/docs/en/engines/table-engines/integrations/mysql.md
+++ b/docs/en/engines/table-engines/integrations/mysql.md
@@ -1,5 +1,5 @@
---
-toc_priority: 3
+toc_priority: 4
toc_title: MySQL
---
diff --git a/docs/en/engines/table-engines/integrations/odbc.md b/docs/en/engines/table-engines/integrations/odbc.md
index 99efd870088..26bfb6aeb0d 100644
--- a/docs/en/engines/table-engines/integrations/odbc.md
+++ b/docs/en/engines/table-engines/integrations/odbc.md
@@ -1,5 +1,5 @@
---
-toc_priority: 1
+toc_priority: 2
toc_title: ODBC
---
diff --git a/docs/en/engines/table-engines/integrations/postgresql.md b/docs/en/engines/table-engines/integrations/postgresql.md
index 8326038407f..ad5bebb3dea 100644
--- a/docs/en/engines/table-engines/integrations/postgresql.md
+++ b/docs/en/engines/table-engines/integrations/postgresql.md
@@ -1,11 +1,11 @@
---
-toc_priority: 8
+toc_priority: 11
toc_title: PostgreSQL
---
# PostgreSQL {#postgresql}
-The PostgreSQL engine allows you to perform `SELECT` queries on data that is stored on a remote PostgreSQL server.
+The PostgreSQL engine allows to perform `SELECT` and `INSERT` queries on data that is stored on a remote PostgreSQL server.
## Creating a Table {#creating-a-table}
@@ -15,7 +15,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2],
...
-) ENGINE = PostgreSQL('host:port', 'database', 'table', 'user', 'password');
+) ENGINE = PostgreSQL('host:port', 'database', 'table', 'user', 'password'[, `schema`]);
```
See a detailed description of the [CREATE TABLE](../../../sql-reference/statements/create/table.md#create-table-query) query.
@@ -29,25 +29,51 @@ The table structure can differ from the original PostgreSQL table structure:
**Engine Parameters**
- `host:port` — PostgreSQL server address.
-
- `database` — Remote database name.
-
- `table` — Remote table name.
-
- `user` — PostgreSQL user.
-
- `password` — User password.
+- `schema` — Non-default table schema. Optional.
-SELECT Queries on PostgreSQL side run as `COPY (SELECT ...) TO STDOUT` inside read-only PostgreSQL transaction with commit after each `SELECT` query.
+## Implementation Details {#implementation-details}
-Simple `WHERE` clauses such as `=, !=, >, >=, <, <=, IN` are executed on the PostgreSQL server.
+`SELECT` queries on PostgreSQL side run as `COPY (SELECT ...) TO STDOUT` inside read-only PostgreSQL transaction with commit after each `SELECT` query.
+
+Simple `WHERE` clauses such as `=`, `!=`, `>`, `>=`, `<`, `<=`, and `IN` are executed on the PostgreSQL server.
All joins, aggregations, sorting, `IN [ array ]` conditions and the `LIMIT` sampling constraint are executed in ClickHouse only after the query to PostgreSQL finishes.
-INSERT Queries on PostgreSQL side run as `COPY "table_name" (field1, field2, ... fieldN) FROM STDIN` inside PostgreSQL transaction with auto-commit after each `INSERT` statement.
+`INSERT` queries on PostgreSQL side run as `COPY "table_name" (field1, field2, ... fieldN) FROM STDIN` inside PostgreSQL transaction with auto-commit after each `INSERT` statement.
-PostgreSQL Array types converts into ClickHouse arrays.
-Be careful in PostgreSQL an array data created like a type_name[] may contain multi-dimensional arrays of different dimensions in different table rows in same column, but in ClickHouse it is only allowed to have multidimensional arrays of the same count of dimensions in all table rows in same column.
+PostgreSQL `Array` types are converted into ClickHouse arrays.
+
+!!! info "Note"
+ Be careful - in PostgreSQL an array data, created like a `type_name[]`, may contain multi-dimensional arrays of different dimensions in different table rows in same column. But in ClickHouse it is only allowed to have multidimensional arrays of the same count of dimensions in all table rows in same column.
+
+Replicas priority for PostgreSQL dictionary source is supported. The bigger the number in map, the less the priority. The highest priority is `0`.
+
+In the example below replica `example01-1` has the highest priority:
+
+```xml
+
+ 5432
+ clickhouse
+ qwerty
+
+ example01-1
+ 1
+
+
+ example01-2
+ 2
+
+ db_name
+
+ id=10
+ SQL_QUERY
+
+
+```
## Usage Example {#usage-example}
@@ -64,10 +90,10 @@ PRIMARY KEY (int_id));
CREATE TABLE
-postgres=# insert into test (int_id, str, "float") VALUES (1,'test',2);
+postgres=# INSERT INTO test (int_id, str, "float") VALUES (1,'test',2);
INSERT 0 1
-postgresql> select * from test;
+postgresql> SELECT * FROM test;
int_id | int_nullable | float | str | float_nullable
--------+--------------+-------+------+----------------
1 | | 2 | test |
@@ -87,20 +113,33 @@ ENGINE = PostgreSQL('localhost:5432', 'public', 'test', 'postges_user', 'postgre
```
``` sql
-SELECT * FROM postgresql_table WHERE str IN ('test')
+SELECT * FROM postgresql_table WHERE str IN ('test');
```
``` text
┌─float_nullable─┬─str──┬─int_id─┐
│ ᴺᵁᴸᴸ │ test │ 1 │
└────────────────┴──────┴────────┘
-1 rows in set. Elapsed: 0.019 sec.
```
+Using Non-default Schema:
-## See Also {#see-also}
+```text
+postgres=# CREATE SCHEMA "nice.schema";
-- [The ‘postgresql’ table function](../../../sql-reference/table-functions/postgresql.md)
+postgres=# CREATE TABLE "nice.schema"."nice.table" (a integer);
+
+postgres=# INSERT INTO "nice.schema"."nice.table" SELECT i FROM generate_series(0, 99) as t(i)
+```
+
+```sql
+CREATE TABLE pg_table_schema_with_dots (a UInt32)
+ ENGINE PostgreSQL('localhost:5432', 'clickhouse', 'nice.table', 'postgrsql_user', 'password', 'nice.schema');
+```
+
+**See Also**
+
+- [The `postgresql` table function](../../../sql-reference/table-functions/postgresql.md)
- [Using PostgreSQL as a source of external dictionary](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-postgresql)
[Original article](https://clickhouse.tech/docs/en/engines/table-engines/integrations/postgresql/)
diff --git a/docs/en/engines/table-engines/integrations/rabbitmq.md b/docs/en/engines/table-engines/integrations/rabbitmq.md
index 476192d3969..5fb9ce5b151 100644
--- a/docs/en/engines/table-engines/integrations/rabbitmq.md
+++ b/docs/en/engines/table-engines/integrations/rabbitmq.md
@@ -1,5 +1,5 @@
---
-toc_priority: 6
+toc_priority: 10
toc_title: RabbitMQ
---
diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md
index 03340f2d8c9..3d02aa13812 100644
--- a/docs/en/engines/table-engines/integrations/s3.md
+++ b/docs/en/engines/table-engines/integrations/s3.md
@@ -1,5 +1,5 @@
---
-toc_priority: 4
+toc_priority: 7
toc_title: S3
---
diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md
index 89fcbafe663..0b45488ebf7 100644
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@@ -502,7 +502,15 @@ On hosts with low RAM and swap, you possibly need setting `max_server_memory_usa
## max_concurrent_queries {#max-concurrent-queries}
-The maximum number of simultaneously processed requests.
+The maximum number of simultaneously processed queries related to MergeTree table. Queries may be limited by other settings: [max_concurrent_queries_for_all_users](#max-concurrent-queries-for-all-users), [min_marks_to_honor_max_concurrent_queries](#min-marks-to-honor-max-concurrent-queries).
+
+!!! info "Note"
+ These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged.
+
+Possible values:
+
+- Positive integer.
+- 0 — Disabled.
**Example**
@@ -530,6 +538,21 @@ Default value: `0` that means no limit.
- [max_concurrent_queries](#max-concurrent-queries)
+## min_marks_to_honor_max_concurrent_queries {#min-marks-to-honor-max-concurrent-queries}
+
+The minimal number of marks read by the query for applying the [max_concurrent_queries](#max-concurrent-queries) setting.
+
+Possible values:
+
+- Positive integer.
+- 0 — Disabled.
+
+**Example**
+
+``` xml
+10
+```
+
## max_connections {#max-connections}
The maximum number of inbound connections.
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 4da31b44b57..a5c3902f8f2 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -1914,7 +1914,7 @@ Default value: `0`.
Enables or disables random shard insertion into a [Distributed](../../engines/table-engines/special/distributed.md#distributed) table when there is no distributed key.
-By default, when inserting data into a `Distributed` table with more than one shard, the ClickHouse server will any insertion request if there is no distributed key. When `insert_distributed_one_random_shard = 1`, insertions are allowed and data is forwarded randomly among all shards.
+By default, when inserting data into a `Distributed` table with more than one shard, the ClickHouse server will reject any insertion request if there is no distributed key. When `insert_distributed_one_random_shard = 1`, insertions are allowed and data is forwarded randomly among all shards.
Possible values:
diff --git a/docs/en/operations/system-tables/replication_queue.md b/docs/en/operations/system-tables/replication_queue.md
index d1c74a771c6..f3e3a35f13b 100644
--- a/docs/en/operations/system-tables/replication_queue.md
+++ b/docs/en/operations/system-tables/replication_queue.md
@@ -14,7 +14,17 @@ Columns:
- `node_name` ([String](../../sql-reference/data-types/string.md)) — Node name in ZooKeeper.
-- `type` ([String](../../sql-reference/data-types/string.md)) — Type of the task in the queue: `GET_PARTS`, `MERGE_PARTS`, `DETACH_PARTS`, `DROP_PARTS`, or `MUTATE_PARTS`.
+- `type` ([String](../../sql-reference/data-types/string.md)) — Type of the task in the queue, one of:
+ - `GET_PART` - Get the part from another replica.
+ - `ATTACH_PART` - Attach the part, possibly from our own replica (if found in `detached` folder).
+ You may think of it as a `GET_PART` with some optimisations as they're nearly identical.
+ - `MERGE_PARTS` - Merge the parts.
+ - `DROP_RANGE` - Delete the parts in the specified partition in the specified number range.
+ - `CLEAR_COLUMN` - NOTE: Deprecated. Drop specific column from specified partition.
+ - `CLEAR_INDEX` - NOTE: Deprecated. Drop specific index from specified partition.
+ - `REPLACE_RANGE` - Drop certain range of partitions and replace them by new ones
+ - `MUTATE_PART` - Apply one or several mutations to the part.
+ - `ALTER_METADATA` - Apply alter modification according to global /metadata and /columns paths
- `create_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — Date and time when the task was submitted for execution.
diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md
index b7129725820..dc0b6e17198 100644
--- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md
+++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md
@@ -69,6 +69,8 @@ Types of sources (`source_type`):
- [ClickHouse](#dicts-external_dicts_dict_sources-clickhouse)
- [MongoDB](#dicts-external_dicts_dict_sources-mongodb)
- [Redis](#dicts-external_dicts_dict_sources-redis)
+ - [Cassandra](#dicts-external_dicts_dict_sources-cassandra)
+ - [PostgreSQL](#dicts-external_dicts_dict_sources-postgresql)
## Local File {#dicts-external_dicts_dict_sources-local_file}
diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md
index 5e1d9d4ba23..499376a70d4 100644
--- a/docs/en/sql-reference/functions/array-functions.md
+++ b/docs/en/sql-reference/functions/array-functions.md
@@ -245,7 +245,7 @@ Elements set to `NULL` are handled as normal values.
Returns the number of elements in the arr array for which func returns something other than 0. If ‘func’ is not specified, it returns the number of non-zero elements in the array.
-Note that the `arrayCount` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument.
+Note that the `arrayCount` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument.
## countEqual(arr, x) {#countequalarr-x}
@@ -1229,7 +1229,7 @@ SELECT arrayReverseFill(x -> not isNull(x), [1, null, 3, 11, 12, null, null, 5,
└────────────────────────────────────┘
```
-Note that the `arrayReverseFilter` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted.
+Note that the `arrayReverseFill` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted.
## arraySplit(func, arr1, …) {#array-split}
@@ -1293,7 +1293,7 @@ Note that the `arrayFirstIndex` is a [higher-order function](../../sql-reference
## arrayMin {#array-min}
-Returns the minimum of elements in the source array.
+Returns the minimum of elements in the source array.
If the `func` function is specified, returns the mininum of elements converted by this function.
@@ -1312,9 +1312,9 @@ arrayMin([func,] arr)
**Returned value**
-- The minimum of function values (or the array minimum).
+- The minimum of function values (or the array minimum).
-Type: if `func` is specified, matches `func` return value type, else matches the array elements type.
+Type: if `func` is specified, matches `func` return value type, else matches the array elements type.
**Examples**
@@ -1348,7 +1348,7 @@ Result:
## arrayMax {#array-max}
-Returns the maximum of elements in the source array.
+Returns the maximum of elements in the source array.
If the `func` function is specified, returns the maximum of elements converted by this function.
@@ -1367,9 +1367,9 @@ arrayMax([func,] arr)
**Returned value**
-- The maximum of function values (or the array maximum).
+- The maximum of function values (or the array maximum).
-Type: if `func` is specified, matches `func` return value type, else matches the array elements type.
+Type: if `func` is specified, matches `func` return value type, else matches the array elements type.
**Examples**
@@ -1403,7 +1403,7 @@ Result:
## arraySum {#array-sum}
-Returns the sum of elements in the source array.
+Returns the sum of elements in the source array.
If the `func` function is specified, returns the sum of elements converted by this function.
@@ -1418,7 +1418,7 @@ arraySum([func,] arr)
**Arguments**
- `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md).
-- `arr` — Array. [Array](../../sql-reference/data-types/array.md).
+- `arr` — Array. [Array](../../sql-reference/data-types/array.md).
**Returned value**
@@ -1458,7 +1458,7 @@ Result:
## arrayAvg {#array-avg}
-Returns the average of elements in the source array.
+Returns the average of elements in the source array.
If the `func` function is specified, returns the average of elements converted by this function.
@@ -1473,7 +1473,7 @@ arrayAvg([func,] arr)
**Arguments**
- `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md).
-- `arr` — Array. [Array](../../sql-reference/data-types/array.md).
+- `arr` — Array. [Array](../../sql-reference/data-types/array.md).
**Returned value**
diff --git a/docs/en/sql-reference/functions/bit-functions.md b/docs/en/sql-reference/functions/bit-functions.md
index 31d09e48e01..e07f28c0f24 100644
--- a/docs/en/sql-reference/functions/bit-functions.md
+++ b/docs/en/sql-reference/functions/bit-functions.md
@@ -250,3 +250,53 @@ Result:
└───────────────┘
```
+## bitHammingDistance {#bithammingdistance}
+
+Returns the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) between the bit representations of two integer values. Can be used with [SimHash](../../sql-reference/functions/hash-functions.md#ngramsimhash) functions for detection of semi-duplicate strings. The smaller is the distance, the more likely those strings are the same.
+
+**Syntax**
+
+``` sql
+bitHammingDistance(int1, int2)
+```
+
+**Arguments**
+
+- `int1` — First integer value. [Int64](../../sql-reference/data-types/int-uint.md).
+- `int2` — Second integer value. [Int64](../../sql-reference/data-types/int-uint.md).
+
+**Returned value**
+
+- The Hamming distance.
+
+Type: [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Examples**
+
+Query:
+
+``` sql
+SELECT bitHammingDistance(111, 121);
+```
+
+Result:
+
+``` text
+┌─bitHammingDistance(111, 121)─┐
+│ 3 │
+└──────────────────────────────┘
+```
+
+With [SimHash](../../sql-reference/functions/hash-functions.md#ngramsimhash):
+
+``` sql
+SELECT bitHammingDistance(ngramSimHash('cat ate rat'), ngramSimHash('rat ate cat'));
+```
+
+Result:
+
+``` text
+┌─bitHammingDistance(ngramSimHash('cat ate rat'), ngramSimHash('rat ate cat'))─┐
+│ 5 │
+└──────────────────────────────────────────────────────────────────────────────┘
+```
diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md
index 945ede4927f..c60067b06af 100644
--- a/docs/en/sql-reference/functions/hash-functions.md
+++ b/docs/en/sql-reference/functions/hash-functions.md
@@ -7,6 +7,8 @@ toc_title: Hash
Hash functions can be used for the deterministic pseudo-random shuffling of elements.
+Simhash is a hash function, which returns close hash values for close (similar) arguments.
+
## halfMD5 {#hash-functions-halfmd5}
[Interprets](../../sql-reference/functions/type-conversion-functions.md#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the [MD5](https://en.wikipedia.org/wiki/MD5) hash value for each of them. Then combines hashes, takes the first 8 bytes of the hash of the resulting string, and interprets them as `UInt64` in big-endian byte order.
@@ -482,3 +484,938 @@ Result:
- [xxHash](http://cyan4973.github.io/xxHash/).
+## ngramSimHash {#ngramsimhash}
+
+Splits a ASCII string into n-grams of `ngramsize` symbols and returns the n-gram `simhash`. Is case sensitive.
+
+Can be used for detection of semi-duplicate strings with [bitHammingDistance](../../sql-reference/functions/bit-functions.md#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same.
+
+**Syntax**
+
+``` sql
+ngramSimHash(string[, ngramsize])
+```
+
+**Arguments**
+
+- `string` — String. [String](../../sql-reference/data-types/string.md).
+- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Returned value**
+
+- Hash value.
+
+Type: [UInt64](../../sql-reference/data-types/int-uint.md).
+
+**Example**
+
+Query:
+
+``` sql
+SELECT ngramSimHash('ClickHouse') AS Hash;
+```
+
+Result:
+
+``` text
+┌───────Hash─┐
+│ 1627567969 │
+└────────────┘
+```
+
+## ngramSimHashCaseInsensitive {#ngramsimhashcaseinsensitive}
+
+Splits a ASCII string into n-grams of `ngramsize` symbols and returns the n-gram `simhash`. Is case insensitive.
+
+Can be used for detection of semi-duplicate strings with [bitHammingDistance](../../sql-reference/functions/bit-functions.md#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same.
+
+**Syntax**
+
+``` sql
+ngramSimHashCaseInsensitive(string[, ngramsize])
+```
+
+**Arguments**
+
+- `string` — String. [String](../../sql-reference/data-types/string.md).
+- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Returned value**
+
+- Hash value.
+
+Type: [UInt64](../../sql-reference/data-types/int-uint.md).
+
+**Example**
+
+Query:
+
+``` sql
+SELECT ngramSimHashCaseInsensitive('ClickHouse') AS Hash;
+```
+
+Result:
+
+``` text
+┌──────Hash─┐
+│ 562180645 │
+└───────────┘
+```
+
+## ngramSimHashUTF8 {#ngramsimhashutf8}
+
+Splits a UTF-8 string into n-grams of `ngramsize` symbols and returns the n-gram `simhash`. Is case sensitive.
+
+Can be used for detection of semi-duplicate strings with [bitHammingDistance](../../sql-reference/functions/bit-functions.md#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same.
+
+**Syntax**
+
+``` sql
+ngramSimHashUTF8(string[, ngramsize])
+```
+
+**Arguments**
+
+- `string` — String. [String](../../sql-reference/data-types/string.md).
+- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Returned value**
+
+- Hash value.
+
+Type: [UInt64](../../sql-reference/data-types/int-uint.md).
+
+**Example**
+
+Query:
+
+``` sql
+SELECT ngramSimHashUTF8('ClickHouse') AS Hash;
+```
+
+Result:
+
+``` text
+┌───────Hash─┐
+│ 1628157797 │
+└────────────┘
+```
+
+## ngramSimHashCaseInsensitiveUTF8 {#ngramsimhashcaseinsensitiveutf8}
+
+Splits a UTF-8 string into n-grams of `ngramsize` symbols and returns the n-gram `simhash`. Is case insensitive.
+
+Can be used for detection of semi-duplicate strings with [bitHammingDistance](../../sql-reference/functions/bit-functions.md#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same.
+
+**Syntax**
+
+``` sql
+ngramSimHashCaseInsensitiveUTF8(string[, ngramsize])
+```
+
+**Arguments**
+
+- `string` — String. [String](../../sql-reference/data-types/string.md).
+- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Returned value**
+
+- Hash value.
+
+Type: [UInt64](../../sql-reference/data-types/int-uint.md).
+
+**Example**
+
+Query:
+
+``` sql
+SELECT ngramSimHashCaseInsensitiveUTF8('ClickHouse') AS Hash;
+```
+
+Result:
+
+``` text
+┌───────Hash─┐
+│ 1636742693 │
+└────────────┘
+```
+
+## wordShingleSimHash {#wordshinglesimhash}
+
+Splits a ASCII string into parts (shingles) of `shinglesize` words and returns the word shingle `simhash`. Is case sensitive.
+
+Can be used for detection of semi-duplicate strings with [bitHammingDistance](../../sql-reference/functions/bit-functions.md#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same.
+
+**Syntax**
+
+``` sql
+wordShingleSimHash(string[, shinglesize])
+```
+
+**Arguments**
+
+- `string` — String. [String](../../sql-reference/data-types/string.md).
+- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Returned value**
+
+- Hash value.
+
+Type: [UInt64](../../sql-reference/data-types/int-uint.md).
+
+**Example**
+
+Query:
+
+``` sql
+SELECT wordShingleSimHash('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).') AS Hash;
+```
+
+Result:
+
+``` text
+┌───────Hash─┐
+│ 2328277067 │
+└────────────┘
+```
+
+## wordShingleSimHashCaseInsensitive {#wordshinglesimhashcaseinsensitive}
+
+Splits a ASCII string into parts (shingles) of `shinglesize` words and returns the word shingle `simhash`. Is case insensitive.
+
+Can be used for detection of semi-duplicate strings with [bitHammingDistance](../../sql-reference/functions/bit-functions.md#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same.
+
+**Syntax**
+
+``` sql
+wordShingleSimHashCaseInsensitive(string[, shinglesize])
+```
+
+**Arguments**
+
+- `string` — String. [String](../../sql-reference/data-types/string.md).
+- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Returned value**
+
+- Hash value.
+
+Type: [UInt64](../../sql-reference/data-types/int-uint.md).
+
+**Example**
+
+Query:
+
+``` sql
+SELECT wordShingleSimHashCaseInsensitive('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).') AS Hash;
+```
+
+Result:
+
+``` text
+┌───────Hash─┐
+│ 2194812424 │
+└────────────┘
+```
+
+## wordShingleSimHashUTF8 {#wordshinglesimhashutf8}
+
+Splits a UTF-8 string into parts (shingles) of `shinglesize` words and returns the word shingle `simhash`. Is case sensitive.
+
+Can be used for detection of semi-duplicate strings with [bitHammingDistance](../../sql-reference/functions/bit-functions.md#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same.
+
+**Syntax**
+
+``` sql
+wordShingleSimHashUTF8(string[, shinglesize])
+```
+
+**Arguments**
+
+- `string` — String. [String](../../sql-reference/data-types/string.md).
+- `shinglesize` — The size of a word shingle. Optinal. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Returned value**
+
+- Hash value.
+
+Type: [UInt64](../../sql-reference/data-types/int-uint.md).
+
+**Example**
+
+Query:
+
+``` sql
+SELECT wordShingleSimHashUTF8('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).') AS Hash;
+```
+
+Result:
+
+``` text
+┌───────Hash─┐
+│ 2328277067 │
+└────────────┘
+```
+
+## wordShingleSimHashCaseInsensitiveUTF8 {#wordshinglesimhashcaseinsensitiveutf8}
+
+Splits a UTF-8 string into parts (shingles) of `shinglesize` words and returns the word shingle `simhash`. Is case insensitive.
+
+Can be used for detection of semi-duplicate strings with [bitHammingDistance](../../sql-reference/functions/bit-functions.md#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same.
+
+**Syntax**
+
+``` sql
+wordShingleSimHashCaseInsensitiveUTF8(string[, shinglesize])
+```
+
+**Arguments**
+
+- `string` — String. [String](../../sql-reference/data-types/string.md).
+- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Returned value**
+
+- Hash value.
+
+Type: [UInt64](../../sql-reference/data-types/int-uint.md).
+
+**Example**
+
+Query:
+
+``` sql
+SELECT wordShingleSimHashCaseInsensitiveUTF8('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).') AS Hash;
+```
+
+Result:
+
+``` text
+┌───────Hash─┐
+│ 2194812424 │
+└────────────┘
+```
+
+## ngramMinHash {#ngramminhash}
+
+Splits a ASCII string into n-grams of `ngramsize` symbols and calculates hash values for each n-gram. Uses `hashnum` minimum hashes to calculate the minimum hash and `hashnum` maximum hashes to calculate the maximum hash. Returns a tuple with these hashes. Is case sensitive.
+
+Can be used for detection of semi-duplicate strings with [tupleHammingDistance](../../sql-reference/functions/tuple-functions.md#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same.
+
+**Syntax**
+
+``` sql
+ngramMinHash(string[, ngramsize, hashnum])
+```
+
+**Arguments**
+
+- `string` — String. [String](../../sql-reference/data-types/string.md).
+- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../../sql-reference/data-types/int-uint.md).
+- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Returned value**
+
+- Tuple with two hashes — the minimum and the maximum.
+
+Type: [Tuple](../../sql-reference/data-types/tuple.md)([UInt64](../../sql-reference/data-types/int-uint.md), [UInt64](../../sql-reference/data-types/int-uint.md)).
+
+**Example**
+
+Query:
+
+``` sql
+SELECT ngramMinHash('ClickHouse') AS Tuple;
+```
+
+Result:
+
+``` text
+┌─Tuple──────────────────────────────────────┐
+│ (18333312859352735453,9054248444481805918) │
+└────────────────────────────────────────────┘
+```
+
+## ngramMinHashCaseInsensitive {#ngramminhashcaseinsensitive}
+
+Splits a ASCII string into n-grams of `ngramsize` symbols and calculates hash values for each n-gram. Uses `hashnum` minimum hashes to calculate the minimum hash and `hashnum` maximum hashes to calculate the maximum hash. Returns a tuple with these hashes. Is case insensitive.
+
+Can be used for detection of semi-duplicate strings with [tupleHammingDistance](../../sql-reference/functions/tuple-functions.md#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same.
+
+**Syntax**
+
+``` sql
+ngramMinHashCaseInsensitive(string[, ngramsize, hashnum])
+```
+
+**Arguments**
+
+- `string` — String. [String](../../sql-reference/data-types/string.md).
+- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../../sql-reference/data-types/int-uint.md).
+- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Returned value**
+
+- Tuple with two hashes — the minimum and the maximum.
+
+Type: [Tuple](../../sql-reference/data-types/tuple.md)([UInt64](../../sql-reference/data-types/int-uint.md), [UInt64](../../sql-reference/data-types/int-uint.md)).
+
+**Example**
+
+Query:
+
+``` sql
+SELECT ngramMinHashCaseInsensitive('ClickHouse') AS Tuple;
+```
+
+Result:
+
+``` text
+┌─Tuple──────────────────────────────────────┐
+│ (2106263556442004574,13203602793651726206) │
+└────────────────────────────────────────────┘
+```
+
+## ngramMinHashUTF8 {#ngramminhashutf8}
+
+Splits a UTF-8 string into n-grams of `ngramsize` symbols and calculates hash values for each n-gram. Uses `hashnum` minimum hashes to calculate the minimum hash and `hashnum` maximum hashes to calculate the maximum hash. Returns a tuple with these hashes. Is case sensitive.
+
+Can be used for detection of semi-duplicate strings with [tupleHammingDistance](../../sql-reference/functions/tuple-functions.md#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same.
+
+**Syntax**
+
+``` sql
+ngramMinHashUTF8(string[, ngramsize, hashnum])
+```
+
+**Arguments**
+
+- `string` — String. [String](../../sql-reference/data-types/string.md).
+- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../../sql-reference/data-types/int-uint.md).
+- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Returned value**
+
+- Tuple with two hashes — the minimum and the maximum.
+
+Type: [Tuple](../../sql-reference/data-types/tuple.md)([UInt64](../../sql-reference/data-types/int-uint.md), [UInt64](../../sql-reference/data-types/int-uint.md)).
+
+**Example**
+
+Query:
+
+``` sql
+SELECT ngramMinHashUTF8('ClickHouse') AS Tuple;
+```
+
+Result:
+
+``` text
+┌─Tuple──────────────────────────────────────┐
+│ (18333312859352735453,6742163577938632877) │
+└────────────────────────────────────────────┘
+```
+
+## ngramMinHashCaseInsensitiveUTF8 {#ngramminhashcaseinsensitiveutf8}
+
+Splits a UTF-8 string into n-grams of `ngramsize` symbols and calculates hash values for each n-gram. Uses `hashnum` minimum hashes to calculate the minimum hash and `hashnum` maximum hashes to calculate the maximum hash. Returns a tuple with these hashes. Is case insensitive.
+
+Can be used for detection of semi-duplicate strings with [tupleHammingDistance](../../sql-reference/functions/tuple-functions.md#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same.
+
+**Syntax**
+
+``` sql
+ngramMinHashCaseInsensitiveUTF8(string [, ngramsize, hashnum])
+```
+
+**Arguments**
+
+- `string` — String. [String](../../sql-reference/data-types/string.md).
+- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../../sql-reference/data-types/int-uint.md).
+- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Returned value**
+
+- Tuple with two hashes — the minimum and the maximum.
+
+Type: [Tuple](../../sql-reference/data-types/tuple.md)([UInt64](../../sql-reference/data-types/int-uint.md), [UInt64](../../sql-reference/data-types/int-uint.md)).
+
+**Example**
+
+Query:
+
+``` sql
+SELECT ngramMinHashCaseInsensitiveUTF8('ClickHouse') AS Tuple;
+```
+
+Result:
+
+``` text
+┌─Tuple───────────────────────────────────────┐
+│ (12493625717655877135,13203602793651726206) │
+└─────────────────────────────────────────────┘
+```
+
+## ngramMinHashArg {#ngramminhasharg}
+
+Splits a ASCII string into n-grams of `ngramsize` symbols and returns the n-grams with minimum and maximum hashes, calculated by the [ngramMinHash](#ngramminhash) function with the same input. Is case sensitive.
+
+**Syntax**
+
+``` sql
+ngramMinHashArg(string[, ngramsize, hashnum])
+```
+
+**Arguments**
+
+- `string` — String. [String](../../sql-reference/data-types/string.md).
+- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../../sql-reference/data-types/int-uint.md).
+- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Returned value**
+
+- Tuple with two tuples with `hashnum` n-grams each.
+
+Type: [Tuple](../../sql-reference/data-types/tuple.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md)), [Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md))).
+
+**Example**
+
+Query:
+
+``` sql
+SELECT ngramMinHashArg('ClickHouse') AS Tuple;
+```
+
+Result:
+
+``` text
+┌─Tuple─────────────────────────────────────────────────────────────────────────┐
+│ (('ous','ick','lic','Hou','kHo','use'),('Hou','lic','ick','ous','ckH','Cli')) │
+└───────────────────────────────────────────────────────────────────────────────┘
+```
+
+## ngramMinHashArgCaseInsensitive {#ngramminhashargcaseinsensitive}
+
+Splits a ASCII string into n-grams of `ngramsize` symbols and returns the n-grams with minimum and maximum hashes, calculated by the [ngramMinHashCaseInsensitive](#ngramminhashcaseinsensitive) function with the same input. Is case insensitive.
+
+**Syntax**
+
+``` sql
+ngramMinHashArgCaseInsensitive(string[, ngramsize, hashnum])
+```
+
+**Arguments**
+
+- `string` — String. [String](../../sql-reference/data-types/string.md).
+- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../../sql-reference/data-types/int-uint.md).
+- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Returned value**
+
+- Tuple with two tuples with `hashnum` n-grams each.
+
+Type: [Tuple](../../sql-reference/data-types/tuple.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md)), [Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md))).
+
+**Example**
+
+Query:
+
+``` sql
+SELECT ngramMinHashArgCaseInsensitive('ClickHouse') AS Tuple;
+```
+
+Result:
+
+``` text
+┌─Tuple─────────────────────────────────────────────────────────────────────────┐
+│ (('ous','ick','lic','kHo','use','Cli'),('kHo','lic','ick','ous','ckH','Hou')) │
+└───────────────────────────────────────────────────────────────────────────────┘
+```
+
+## ngramMinHashArgUTF8 {#ngramminhashargutf8}
+
+Splits a UTF-8 string into n-grams of `ngramsize` symbols and returns the n-grams with minimum and maximum hashes, calculated by the [ngramMinHashUTF8](#ngramminhashutf8) function with the same input. Is case sensitive.
+
+**Syntax**
+
+``` sql
+ngramMinHashArgUTF8(string[, ngramsize, hashnum])
+```
+
+**Arguments**
+
+- `string` — String. [String](../../sql-reference/data-types/string.md).
+- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../../sql-reference/data-types/int-uint.md).
+- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Returned value**
+
+- Tuple with two tuples with `hashnum` n-grams each.
+
+Type: [Tuple](../../sql-reference/data-types/tuple.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md)), [Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md))).
+
+**Example**
+
+Query:
+
+``` sql
+SELECT ngramMinHashArgUTF8('ClickHouse') AS Tuple;
+```
+
+Result:
+
+``` text
+┌─Tuple─────────────────────────────────────────────────────────────────────────┐
+│ (('ous','ick','lic','Hou','kHo','use'),('kHo','Hou','lic','ick','ous','ckH')) │
+└───────────────────────────────────────────────────────────────────────────────┘
+```
+
+## ngramMinHashArgCaseInsensitiveUTF8 {#ngramminhashargcaseinsensitiveutf8}
+
+Splits a UTF-8 string into n-grams of `ngramsize` symbols and returns the n-grams with minimum and maximum hashes, calculated by the [ngramMinHashCaseInsensitiveUTF8](#ngramminhashcaseinsensitiveutf8) function with the same input. Is case insensitive.
+
+**Syntax**
+
+``` sql
+ngramMinHashArgCaseInsensitiveUTF8(string[, ngramsize, hashnum])
+```
+
+**Arguments**
+
+- `string` — String. [String](../../sql-reference/data-types/string.md).
+- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../../sql-reference/data-types/int-uint.md).
+- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Returned value**
+
+- Tuple with two tuples with `hashnum` n-grams each.
+
+Type: [Tuple](../../sql-reference/data-types/tuple.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md)), [Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md))).
+
+**Example**
+
+Query:
+
+``` sql
+SELECT ngramMinHashArgCaseInsensitiveUTF8('ClickHouse') AS Tuple;
+```
+
+Result:
+
+``` text
+┌─Tuple─────────────────────────────────────────────────────────────────────────┐
+│ (('ckH','ous','ick','lic','kHo','use'),('kHo','lic','ick','ous','ckH','Hou')) │
+└───────────────────────────────────────────────────────────────────────────────┘
+```
+
+## wordShingleMinHash {#wordshingleminhash}
+
+Splits a ASCII string into parts (shingles) of `shinglesize` words and calculates hash values for each word shingle. Uses `hashnum` minimum hashes to calculate the minimum hash and `hashnum` maximum hashes to calculate the maximum hash. Returns a tuple with these hashes. Is case sensitive.
+
+Can be used for detection of semi-duplicate strings with [tupleHammingDistance](../../sql-reference/functions/tuple-functions.md#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same.
+
+**Syntax**
+
+``` sql
+wordShingleMinHash(string[, shinglesize, hashnum])
+```
+
+**Arguments**
+
+- `string` — String. [String](../../sql-reference/data-types/string.md).
+- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../../sql-reference/data-types/int-uint.md).
+- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Returned value**
+
+- Tuple with two hashes — the minimum and the maximum.
+
+Type: [Tuple](../../sql-reference/data-types/tuple.md)([UInt64](../../sql-reference/data-types/int-uint.md), [UInt64](../../sql-reference/data-types/int-uint.md)).
+
+**Example**
+
+Query:
+
+``` sql
+SELECT wordShingleMinHash('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).') AS Tuple;
+```
+
+Result:
+
+``` text
+┌─Tuple──────────────────────────────────────┐
+│ (16452112859864147620,5844417301642981317) │
+└────────────────────────────────────────────┘
+```
+
+## wordShingleMinHashCaseInsensitive {#wordshingleminhashcaseinsensitive}
+
+Splits a ASCII string into parts (shingles) of `shinglesize` words and calculates hash values for each word shingle. Uses `hashnum` minimum hashes to calculate the minimum hash and `hashnum` maximum hashes to calculate the maximum hash. Returns a tuple with these hashes. Is case insensitive.
+
+Can be used for detection of semi-duplicate strings with [tupleHammingDistance](../../sql-reference/functions/tuple-functions.md#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same.
+
+**Syntax**
+
+``` sql
+wordShingleMinHashCaseInsensitive(string[, shinglesize, hashnum])
+```
+
+**Arguments**
+
+- `string` — String. [String](../../sql-reference/data-types/string.md).
+- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../../sql-reference/data-types/int-uint.md).
+- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Returned value**
+
+- Tuple with two hashes — the minimum and the maximum.
+
+Type: [Tuple](../../sql-reference/data-types/tuple.md)([UInt64](../../sql-reference/data-types/int-uint.md), [UInt64](../../sql-reference/data-types/int-uint.md)).
+
+**Example**
+
+Query:
+
+``` sql
+SELECT wordShingleMinHashCaseInsensitive('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).') AS Tuple;
+```
+
+Result:
+
+``` text
+┌─Tuple─────────────────────────────────────┐
+│ (3065874883688416519,1634050779997673240) │
+└───────────────────────────────────────────┘
+```
+
+## wordShingleMinHashUTF8 {#wordshingleminhashutf8}
+
+Splits a UTF-8 string into parts (shingles) of `shinglesize` words and calculates hash values for each word shingle. Uses `hashnum` minimum hashes to calculate the minimum hash and `hashnum` maximum hashes to calculate the maximum hash. Returns a tuple with these hashes. Is case sensitive.
+
+Can be used for detection of semi-duplicate strings with [tupleHammingDistance](../../sql-reference/functions/tuple-functions.md#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same.
+
+**Syntax**
+
+``` sql
+wordShingleMinHashUTF8(string[, shinglesize, hashnum])
+```
+
+**Arguments**
+
+- `string` — String. [String](../../sql-reference/data-types/string.md).
+- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../../sql-reference/data-types/int-uint.md).
+- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Returned value**
+
+- Tuple with two hashes — the minimum and the maximum.
+
+Type: [Tuple](../../sql-reference/data-types/tuple.md)([UInt64](../../sql-reference/data-types/int-uint.md), [UInt64](../../sql-reference/data-types/int-uint.md)).
+
+**Example**
+
+Query:
+
+``` sql
+SELECT wordShingleMinHashUTF8('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).') AS Tuple;
+```
+
+Result:
+
+``` text
+┌─Tuple──────────────────────────────────────┐
+│ (16452112859864147620,5844417301642981317) │
+└────────────────────────────────────────────┘
+```
+
+## wordShingleMinHashCaseInsensitiveUTF8 {#wordshingleminhashcaseinsensitiveutf8}
+
+Splits a UTF-8 string into parts (shingles) of `shinglesize` words and calculates hash values for each word shingle. Uses `hashnum` minimum hashes to calculate the minimum hash and `hashnum` maximum hashes to calculate the maximum hash. Returns a tuple with these hashes. Is case insensitive.
+
+Can be used for detection of semi-duplicate strings with [tupleHammingDistance](../../sql-reference/functions/tuple-functions.md#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same.
+
+**Syntax**
+
+``` sql
+wordShingleMinHashCaseInsensitiveUTF8(string[, shinglesize, hashnum])
+```
+
+**Arguments**
+
+- `string` — String. [String](../../sql-reference/data-types/string.md).
+- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../../sql-reference/data-types/int-uint.md).
+- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Returned value**
+
+- Tuple with two hashes — the minimum and the maximum.
+
+Type: [Tuple](../../sql-reference/data-types/tuple.md)([UInt64](../../sql-reference/data-types/int-uint.md), [UInt64](../../sql-reference/data-types/int-uint.md)).
+
+**Example**
+
+Query:
+
+``` sql
+SELECT wordShingleMinHashCaseInsensitiveUTF8('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).') AS Tuple;
+```
+
+Result:
+
+``` text
+┌─Tuple─────────────────────────────────────┐
+│ (3065874883688416519,1634050779997673240) │
+└───────────────────────────────────────────┘
+```
+
+## wordShingleMinHashArg {#wordshingleminhasharg}
+
+Splits a ASCII string into parts (shingles) of `shinglesize` words each and returns the shingles with minimum and maximum word hashes, calculated by the [wordshingleMinHash](#wordshingleminhash) function with the same input. Is case sensitive.
+
+**Syntax**
+
+``` sql
+wordShingleMinHashArg(string[, shinglesize, hashnum])
+```
+
+**Arguments**
+
+- `string` — String. [String](../../sql-reference/data-types/string.md).
+- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../../sql-reference/data-types/int-uint.md).
+- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Returned value**
+
+- Tuple with two tuples with `hashnum` word shingles each.
+
+Type: [Tuple](../../sql-reference/data-types/tuple.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md)), [Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md))).
+
+**Example**
+
+Query:
+
+``` sql
+SELECT wordShingleMinHashArg('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).', 1, 3) AS Tuple;
+```
+
+Result:
+
+``` text
+┌─Tuple─────────────────────────────────────────────────────────────────┐
+│ (('OLAP','database','analytical'),('online','oriented','processing')) │
+└───────────────────────────────────────────────────────────────────────┘
+```
+
+## wordShingleMinHashArgCaseInsensitive {#wordshingleminhashargcaseinsensitive}
+
+Splits a ASCII string into parts (shingles) of `shinglesize` words each and returns the shingles with minimum and maximum word hashes, calculated by the [wordShingleMinHashCaseInsensitive](#wordshingleminhashcaseinsensitive) function with the same input. Is case insensitive.
+
+**Syntax**
+
+``` sql
+wordShingleMinHashArgCaseInsensitive(string[, shinglesize, hashnum])
+```
+
+**Arguments**
+
+- `string` — String. [String](../../sql-reference/data-types/string.md).
+- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../../sql-reference/data-types/int-uint.md).
+- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Returned value**
+
+- Tuple with two tuples with `hashnum` word shingles each.
+
+Type: [Tuple](../../sql-reference/data-types/tuple.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md)), [Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md))).
+
+**Example**
+
+Query:
+
+``` sql
+SELECT wordShingleMinHashArgCaseInsensitive('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).', 1, 3) AS Tuple;
+```
+
+Result:
+
+``` text
+┌─Tuple──────────────────────────────────────────────────────────────────┐
+│ (('queries','database','analytical'),('oriented','processing','DBMS')) │
+└────────────────────────────────────────────────────────────────────────┘
+```
+
+## wordShingleMinHashArgUTF8 {#wordshingleminhashargutf8}
+
+Splits a UTF-8 string into parts (shingles) of `shinglesize` words each and returns the shingles with minimum and maximum word hashes, calculated by the [wordShingleMinHashUTF8](#wordshingleminhashutf8) function with the same input. Is case sensitive.
+
+**Syntax**
+
+``` sql
+wordShingleMinHashArgUTF8(string[, shinglesize, hashnum])
+```
+
+**Arguments**
+
+- `string` — String. [String](../../sql-reference/data-types/string.md).
+- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../../sql-reference/data-types/int-uint.md).
+- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Returned value**
+
+- Tuple with two tuples with `hashnum` word shingles each.
+
+Type: [Tuple](../../sql-reference/data-types/tuple.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md)), [Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md))).
+
+**Example**
+
+Query:
+
+``` sql
+SELECT wordShingleMinHashArgUTF8('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).', 1, 3) AS Tuple;
+```
+
+Result:
+
+``` text
+┌─Tuple─────────────────────────────────────────────────────────────────┐
+│ (('OLAP','database','analytical'),('online','oriented','processing')) │
+└───────────────────────────────────────────────────────────────────────┘
+```
+
+## wordShingleMinHashArgCaseInsensitiveUTF8 {#wordshingleminhashargcaseinsensitiveutf8}
+
+Splits a UTF-8 string into parts (shingles) of `shinglesize` words each and returns the shingles with minimum and maximum word hashes, calculated by the [wordShingleMinHashCaseInsensitiveUTF8](#wordshingleminhashcaseinsensitiveutf8) function with the same input. Is case insensitive.
+
+**Syntax**
+
+``` sql
+wordShingleMinHashArgCaseInsensitiveUTF8(string[, shinglesize, hashnum])
+```
+
+**Arguments**
+
+- `string` — String. [String](../../sql-reference/data-types/string.md).
+- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../../sql-reference/data-types/int-uint.md).
+- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Returned value**
+
+- Tuple with two tuples with `hashnum` word shingles each.
+
+Type: [Tuple](../../sql-reference/data-types/tuple.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md)), [Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md))).
+
+**Example**
+
+Query:
+
+``` sql
+SELECT wordShingleMinHashArgCaseInsensitiveUTF8('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).', 1, 3) AS Tuple;
+```
+
+Result:
+
+``` text
+┌─Tuple──────────────────────────────────────────────────────────────────┐
+│ (('queries','database','analytical'),('oriented','processing','DBMS')) │
+└────────────────────────────────────────────────────────────────────────┘
+```
diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md
index 884e1ef754f..86442835425 100644
--- a/docs/en/sql-reference/functions/tuple-functions.md
+++ b/docs/en/sql-reference/functions/tuple-functions.md
@@ -111,4 +111,55 @@ Result:
- [Tuple](../../sql-reference/data-types/tuple.md)
-[Original article](https://clickhouse.tech/docs/en/sql-reference/functions/tuple-functions/)
+## tupleHammingDistance {#tuplehammingdistance}
+
+Returns the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) between two tuples of the same size.
+
+**Syntax**
+
+``` sql
+tupleHammingDistance(tuple1, tuple2)
+```
+
+**Arguments**
+
+- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md).
+- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md).
+
+Tuples should have the same type of the elements.
+
+**Returned value**
+
+- The Hamming distance.
+
+Type: [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Examples**
+
+Query:
+
+``` sql
+SELECT tupleHammingDistance((1, 2, 3), (3, 2, 1)) AS HammingDistance;
+```
+
+Result:
+
+``` text
+┌─HammingDistance─┐
+│ 2 │
+└─────────────────┘
+```
+
+Can be used with [MinHash](../../sql-reference/functions/hash-functions.md#ngramminhash) functions for detection of semi-duplicate strings:
+
+``` sql
+SELECT tupleHammingDistance(wordShingleMinHash(string), wordShingleMinHashCaseInsensitive(string)) as HammingDistance FROM (SELECT 'Clickhouse is a column-oriented database management system for online analytical processing of queries.' AS string);
+```
+
+Result:
+
+``` text
+┌─HammingDistance─┐
+│ 2 │
+└─────────────────┘
+```
diff --git a/docs/en/sql-reference/statements/alter/partition.md b/docs/en/sql-reference/statements/alter/partition.md
index 42396223b86..f7183ba525c 100644
--- a/docs/en/sql-reference/statements/alter/partition.md
+++ b/docs/en/sql-reference/statements/alter/partition.md
@@ -40,7 +40,7 @@ Read about setting the partition expression in a section [How to specify the par
After the query is executed, you can do whatever you want with the data in the `detached` directory — delete it from the file system, or just leave it.
-This query is replicated – it moves the data to the `detached` directory on all replicas. Note that you can execute this query only on a leader replica. To find out if a replica is a leader, perform the `SELECT` query to the [system.replicas](../../../operations/system-tables/replicas.md#system_tables-replicas) table. Alternatively, it is easier to make a `DETACH` query on all replicas - all the replicas throw an exception, except the leader replica.
+This query is replicated – it moves the data to the `detached` directory on all replicas. Note that you can execute this query only on a leader replica. To find out if a replica is a leader, perform the `SELECT` query to the [system.replicas](../../../operations/system-tables/replicas.md#system_tables-replicas) table. Alternatively, it is easier to make a `DETACH` query on all replicas - all the replicas throw an exception, except the leader replicas (as multiple leaders are allowed).
## DROP PARTITION\|PART {#alter_drop-partition}
@@ -85,9 +85,15 @@ ALTER TABLE visits ATTACH PART 201901_2_2_0;
Read more about setting the partition expression in a section [How to specify the partition expression](#alter-how-to-specify-part-expr).
-This query is replicated. The replica-initiator checks whether there is data in the `detached` directory. If data exists, the query checks its integrity. If everything is correct, the query adds the data to the table. All other replicas download the data from the replica-initiator.
+This query is replicated. The replica-initiator checks whether there is data in the `detached` directory.
+If data exists, the query checks its integrity. If everything is correct, the query adds the data to the table.
-So you can put data to the `detached` directory on one replica, and use the `ALTER ... ATTACH` query to add it to the table on all replicas.
+If the non-initiator replica, receiving the attach command, finds the part with the correct checksums in its own
+`detached` folder, it attaches the data without fetching it from other replicas.
+If there is no part with the correct checksums, the data is downloaded from any replica having the part.
+
+You can put data to the `detached` directory on one replica and use the `ALTER ... ATTACH` query to add it to the
+table on all replicas.
## ATTACH PARTITION FROM {#alter_attach-partition-from}
@@ -95,7 +101,8 @@ So you can put data to the `detached` directory on one replica, and use the `ALT
ALTER TABLE table2 ATTACH PARTITION partition_expr FROM table1
```
-This query copies the data partition from the `table1` to `table2` adds data to exsisting in the `table2`. Note that data won’t be deleted from `table1`.
+This query copies the data partition from the `table1` to `table2`.
+Note that data won't be deleted neither from `table1` nor from `table2`.
For the query to run successfully, the following conditions must be met:
diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md
index 725024efe0c..2348a2a2668 100644
--- a/docs/en/sql-reference/statements/system.md
+++ b/docs/en/sql-reference/statements/system.md
@@ -264,6 +264,10 @@ Wait until a `ReplicatedMergeTree` table will be synced with other replicas in a
SYSTEM SYNC REPLICA [db.]replicated_merge_tree_family_table_name
```
+After running this statement the `[db.]replicated_merge_tree_family_table_name` fetches commands from
+the common replicated log into its own replication queue, and then the query waits till the replica processes all
+of the fetched commands.
+
### RESTART REPLICA {#query_language-system-restart-replica}
Provides possibility to reinitialize Zookeeper sessions state for `ReplicatedMergeTree` table, will compare current state with Zookeeper as source of true and add tasks to Zookeeper queue if needed
diff --git a/docs/en/sql-reference/table-functions/index.md b/docs/en/sql-reference/table-functions/index.md
index fef30c04c9d..d65a18ab985 100644
--- a/docs/en/sql-reference/table-functions/index.md
+++ b/docs/en/sql-reference/table-functions/index.md
@@ -21,16 +21,18 @@ You can use table functions in:
!!! warning "Warning"
You can’t use table functions if the [allow_ddl](../../operations/settings/permissions-for-queries.md#settings_allow_ddl) setting is disabled.
-| Function | Description |
-|-----------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------|
-| [file](../../sql-reference/table-functions/file.md) | Creates a File-engine table. |
-| [merge](../../sql-reference/table-functions/merge.md) | Creates a Merge-engine table. |
-| [numbers](../../sql-reference/table-functions/numbers.md) | Creates a table with a single column filled with integer numbers. |
-| [remote](../../sql-reference/table-functions/remote.md) | Allows you to access remote servers without creating a Distributed-engine table. |
-| [url](../../sql-reference/table-functions/url.md) | Creates a URL-engine table. |
-| [mysql](../../sql-reference/table-functions/mysql.md) | Creates a MySQL-engine table. |
-| [postgresql](../../sql-reference/table-functions/postgresql.md) | Creates a PostgreSQL-engine table. |
-| [jdbc](../../sql-reference/table-functions/jdbc.md) | Creates a JDBC-engine table. |
-| [odbc](../../sql-reference/table-functions/odbc.md) | Creates a ODBC-engine table. |
-| [hdfs](../../sql-reference/table-functions/hdfs.md) | Creates a HDFS-engine table. |
-| [s3](../../sql-reference/table-functions/s3.md) | Creates a S3-engine table. |
+| Function | Description |
+|------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------|
+| [file](../../sql-reference/table-functions/file.md) | Creates a [File](../../engines/table-engines/special/file.md)-engine table. |
+| [merge](../../sql-reference/table-functions/merge.md) | Creates a [Merge](../../engines/table-engines/special/merge.md)-engine table. |
+| [numbers](../../sql-reference/table-functions/numbers.md) | Creates a table with a single column filled with integer numbers. |
+| [remote](../../sql-reference/table-functions/remote.md) | Allows you to access remote servers without creating a [Distributed](../../engines/table-engines/special/distributed.md)-engine table. |
+| [url](../../sql-reference/table-functions/url.md) | Creates a [Url](../../engines/table-engines/special/url.md)-engine table. |
+| [mysql](../../sql-reference/table-functions/mysql.md) | Creates a [MySQL](../../engines/table-engines/integrations/mysql.md)-engine table. |
+| [postgresql](../../sql-reference/table-functions/postgresql.md) | Creates a [PostgreSQL](../../engines/table-engines/integrations/postgresql.md)-engine table. |
+| [jdbc](../../sql-reference/table-functions/jdbc.md) | Creates a [JDBC](../../engines/table-engines/integrations/jdbc.md)-engine table. |
+| [odbc](../../sql-reference/table-functions/odbc.md) | Creates a [ODBC](../../engines/table-engines/integrations/odbc.md)-engine table. |
+| [hdfs](../../sql-reference/table-functions/hdfs.md) | Creates a [HDFS](../../engines/table-engines/integrations/hdfs.md)-engine table. |
+| [s3](../../sql-reference/table-functions/s3.md) | Creates a [S3](../../engines/table-engines/integrations/s3.md)-engine table. |
+
+[Original article](https://clickhouse.tech/docs/en/sql-reference/table-functions/)
diff --git a/docs/en/sql-reference/table-functions/postgresql.md b/docs/en/sql-reference/table-functions/postgresql.md
index ad5d8a29904..bfb5fdf9be6 100644
--- a/docs/en/sql-reference/table-functions/postgresql.md
+++ b/docs/en/sql-reference/table-functions/postgresql.md
@@ -10,33 +10,17 @@ Allows `SELECT` and `INSERT` queries to be performed on data that is stored on a
**Syntax**
``` sql
-postgresql('host:port', 'database', 'table', 'user', 'password')
+postgresql('host:port', 'database', 'table', 'user', 'password'[, `schema`])
```
**Arguments**
- `host:port` — PostgreSQL server address.
-
- `database` — Remote database name.
-
- `table` — Remote table name.
-
- `user` — PostgreSQL user.
-
- `password` — User password.
-
-
-SELECT Queries on PostgreSQL side run as `COPY (SELECT ...) TO STDOUT` inside read-only PostgreSQL transaction with commit after each `SELECT` query.
-
-Simple `WHERE` clauses such as `=, !=, >, >=, <, <=, IN` are executed on the PostgreSQL server.
-
-All joins, aggregations, sorting, `IN [ array ]` conditions and the `LIMIT` sampling constraint are executed in ClickHouse only after the query to PostgreSQL finishes.
-
-INSERT Queries on PostgreSQL side run as `COPY "table_name" (field1, field2, ... fieldN) FROM STDIN` inside PostgreSQL transaction with auto-commit after each `INSERT` statement.
-
-PostgreSQL Array types converts into ClickHouse arrays.
-
-Be careful in PostgreSQL an array data type column like Integer[] may contain arrays of different dimensions in different rows, but in ClickHouse it is only allowed to have multidimensional arrays of the same dimension in all rows.
+- `schema` — Non-default table schema. Optional.
**Returned Value**
@@ -45,6 +29,23 @@ A table object with the same columns as the original PostgreSQL table.
!!! info "Note"
In the `INSERT` query to distinguish table function `postgresql(...)` from table name with column names list you must use keywords `FUNCTION` or `TABLE FUNCTION`. See examples below.
+## Implementation Details {#implementation-details}
+
+`SELECT` queries on PostgreSQL side run as `COPY (SELECT ...) TO STDOUT` inside read-only PostgreSQL transaction with commit after each `SELECT` query.
+
+Simple `WHERE` clauses such as `=`, `!=`, `>`, `>=`, `<`, `<=`, and `IN` are executed on the PostgreSQL server.
+
+All joins, aggregations, sorting, `IN [ array ]` conditions and the `LIMIT` sampling constraint are executed in ClickHouse only after the query to PostgreSQL finishes.
+
+`INSERT` queries on PostgreSQL side run as `COPY "table_name" (field1, field2, ... fieldN) FROM STDIN` inside PostgreSQL transaction with auto-commit after each `INSERT` statement.
+
+PostgreSQL Array types converts into ClickHouse arrays.
+
+!!! info "Note"
+ Be careful, in PostgreSQL an array data type column like Integer[] may contain arrays of different dimensions in different rows, but in ClickHouse it is only allowed to have multidimensional arrays of the same dimension in all rows.
+
+Supports replicas priority for PostgreSQL dictionary source. The bigger the number in map, the less the priority. The highest priority is `0`.
+
**Examples**
Table in PostgreSQL:
@@ -60,10 +61,10 @@ PRIMARY KEY (int_id));
CREATE TABLE
-postgres=# insert into test (int_id, str, "float") VALUES (1,'test',2);
+postgres=# INSERT INTO test (int_id, str, "float") VALUES (1,'test',2);
INSERT 0 1
-postgresql> select * from test;
+postgresql> SELECT * FROM test;
int_id | int_nullable | float | str | float_nullable
--------+--------------+-------+------+----------------
1 | | 2 | test |
@@ -96,9 +97,24 @@ SELECT * FROM postgresql('localhost:5432', 'test', 'test', 'postgresql_user', 'p
└────────┴──────────────┴───────┴──────┴────────────────┘
```
+Using Non-default Schema:
+
+```text
+postgres=# CREATE SCHEMA "nice.schema";
+
+postgres=# CREATE TABLE "nice.schema"."nice.table" (a integer);
+
+postgres=# INSERT INTO "nice.schema"."nice.table" SELECT i FROM generate_series(0, 99) as t(i)
+```
+
+```sql
+CREATE TABLE pg_table_schema_with_dots (a UInt32)
+ ENGINE PostgreSQL('localhost:5432', 'clickhouse', 'nice.table', 'postgrsql_user', 'password', 'nice.schema');
+```
+
**See Also**
-- [The ‘PostgreSQL’ table engine](../../engines/table-engines/integrations/postgresql.md)
+- [The PostgreSQL table engine](../../engines/table-engines/integrations/postgresql.md)
- [Using PostgreSQL as a source of external dictionary](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-postgresql)
[Original article](https://clickhouse.tech/docs/en/sql-reference/table-functions/postgresql/)
diff --git a/docs/ru/engines/database-engines/index.md b/docs/ru/engines/database-engines/index.md
index ec92edd2888..d4fad8f43a9 100644
--- a/docs/ru/engines/database-engines/index.md
+++ b/docs/ru/engines/database-engines/index.md
@@ -4,7 +4,7 @@ toc_priority: 27
toc_title: "Введение"
---
-# Движки баз данных {#dvizhki-baz-dannykh}
+# Движки баз данных {#database-engines}
Движки баз данных обеспечивают работу с таблицами.
@@ -18,3 +18,5 @@ toc_title: "Введение"
- [Lazy](../../engines/database-engines/lazy.md)
+- [PostgreSQL](../../engines/database-engines/postgresql.md)
+
diff --git a/docs/ru/engines/database-engines/postgresql.md b/docs/ru/engines/database-engines/postgresql.md
new file mode 100644
index 00000000000..c11dab6f1aa
--- /dev/null
+++ b/docs/ru/engines/database-engines/postgresql.md
@@ -0,0 +1,138 @@
+---
+toc_priority: 35
+toc_title: PostgreSQL
+---
+
+# PostgreSQL {#postgresql}
+
+Позволяет подключаться к БД на удаленном сервере [PostgreSQL](https://www.postgresql.org). Поддерживает операции чтения и записи (запросы `SELECT` и `INSERT`) для обмена данными между ClickHouse и PostgreSQL.
+
+Позволяет в реальном времени получать от удаленного сервера PostgreSQL информацию о таблицах БД и их структуре с помощью запросов `SHOW TABLES` и `DESCRIBE TABLE`.
+
+Поддерживает операции изменения структуры таблиц (`ALTER TABLE ... ADD|DROP COLUMN`). Если параметр `use_table_cache` (см. ниже раздел Параметры движка) установлен в значение `1`, структура таблицы кешируется, и изменения в структуре не отслеживаются, но будут обновлены, если выполнить команды `DETACH` и `ATTACH`.
+
+## Создание БД {#creating-a-database}
+
+``` sql
+CREATE DATABASE test_database
+ENGINE = PostgreSQL('host:port', 'database', 'user', 'password'[, `use_table_cache`]);
+```
+
+**Параметры движка**
+
+- `host:port` — адрес сервера PostgreSQL.
+- `database` — имя удаленной БД.
+- `user` — пользователь PostgreSQL.
+- `password` — пароль пользователя.
+- `use_table_cache` — определяет кеширование структуры таблиц БД. Необязательный параметр. Значение по умолчанию: `0`.
+
+## Поддерживаемые типы данных {#data_types-support}
+
+| PostgerSQL | ClickHouse |
+|------------------|--------------------------------------------------------------|
+| DATE | [Date](../../sql-reference/data-types/date.md) |
+| TIMESTAMP | [DateTime](../../sql-reference/data-types/datetime.md) |
+| REAL | [Float32](../../sql-reference/data-types/float.md) |
+| DOUBLE | [Float64](../../sql-reference/data-types/float.md) |
+| DECIMAL, NUMERIC | [Decimal](../../sql-reference/data-types/decimal.md) |
+| SMALLINT | [Int16](../../sql-reference/data-types/int-uint.md) |
+| INTEGER | [Int32](../../sql-reference/data-types/int-uint.md) |
+| BIGINT | [Int64](../../sql-reference/data-types/int-uint.md) |
+| SERIAL | [UInt32](../../sql-reference/data-types/int-uint.md) |
+| BIGSERIAL | [UInt64](../../sql-reference/data-types/int-uint.md) |
+| TEXT, CHAR | [String](../../sql-reference/data-types/string.md) |
+| INTEGER | Nullable([Int32](../../sql-reference/data-types/int-uint.md))|
+| ARRAY | [Array](../../sql-reference/data-types/array.md) |
+
+
+## Примеры использования {#examples-of-use}
+
+Обмен данными между БД ClickHouse и сервером PostgreSQL:
+
+``` sql
+CREATE DATABASE test_database
+ENGINE = PostgreSQL('postgres1:5432', 'test_database', 'postgres', 'mysecretpassword', 1);
+```
+
+``` sql
+SHOW DATABASES;
+```
+
+``` text
+┌─name──────────┐
+│ default │
+│ test_database │
+│ system │
+└───────────────┘
+```
+
+``` sql
+SHOW TABLES FROM test_database;
+```
+
+``` text
+┌─name───────┐
+│ test_table │
+└────────────┘
+```
+
+Чтение данных из таблицы PostgreSQL:
+
+``` sql
+SELECT * FROM test_database.test_table;
+```
+
+``` text
+┌─id─┬─value─┐
+│ 1 │ 2 │
+└────┴───────┘
+```
+
+Запись данных в таблицу PostgreSQL:
+
+``` sql
+INSERT INTO test_database.test_table VALUES (3,4);
+SELECT * FROM test_database.test_table;
+```
+
+``` text
+┌─int_id─┬─value─┐
+│ 1 │ 2 │
+│ 3 │ 4 │
+└────────┴───────┘
+```
+
+Пусть структура таблицы была изменена в PostgreSQL:
+
+``` sql
+postgre> ALTER TABLE test_table ADD COLUMN data Text
+```
+
+Поскольку при создании БД параметр `use_table_cache` был установлен в значение `1`, структура таблицы в ClickHouse была кеширована и поэтому не изменилась:
+
+``` sql
+DESCRIBE TABLE test_database.test_table;
+```
+``` text
+┌─name───┬─type──────────────┐
+│ id │ Nullable(Integer) │
+│ value │ Nullable(Integer) │
+└────────┴───────────────────┘
+```
+
+После того как таблицу «отцепили» и затем снова «прицепили», структура обновилась:
+
+``` sql
+DETACH TABLE test_database.test_table;
+ATTACH TABLE test_database.test_table;
+DESCRIBE TABLE test_database.test_table;
+```
+``` text
+┌─name───┬─type──────────────┐
+│ id │ Nullable(Integer) │
+│ value │ Nullable(Integer) │
+│ data │ Nullable(String) │
+└────────┴───────────────────┘
+```
+
+[Оригинальная статья](https://clickhouse.tech/docs/ru/database-engines/postgresql/)
diff --git a/docs/ru/engines/table-engines/index.md b/docs/ru/engines/table-engines/index.md
index 6c11011a307..a364a3cb972 100644
--- a/docs/ru/engines/table-engines/index.md
+++ b/docs/ru/engines/table-engines/index.md
@@ -16,7 +16,7 @@ toc_title: "Введение"
- Возможно ли многопоточное выполнение запроса.
- Параметры репликации данных.
-## Семейства движков {#semeistva-dvizhkov}
+## Семейства движков {#engine-families}
### MergeTree {#mergetree}
@@ -42,18 +42,23 @@ toc_title: "Введение"
- [StripeLog](log-family/stripelog.md#stripelog)
- [Log](log-family/log.md#log)
-### Движки для интеграции {#dvizhki-dlia-integratsii}
+### Движки для интеграции {#integration-engines}
Движки для связи с другими системами хранения и обработки данных.
Движки семейства:
-- [Kafka](integrations/kafka.md#kafka)
-- [MySQL](integrations/mysql.md#mysql)
-- [ODBC](integrations/odbc.md#table-engine-odbc)
-- [JDBC](integrations/jdbc.md#table-engine-jdbc)
+- [ODBC](../../engines/table-engines/integrations/odbc.md)
+- [JDBC](../../engines/table-engines/integrations/jdbc.md)
+- [MySQL](../../engines/table-engines/integrations/mysql.md)
+- [MongoDB](../../engines/table-engines/integrations/mongodb.md)
+- [HDFS](../../engines/table-engines/integrations/hdfs.md)
+- [Kafka](../../engines/table-engines/integrations/kafka.md)
+- [EmbeddedRocksDB](../../engines/table-engines/integrations/embedded-rocksdb.md)
+- [RabbitMQ](../../engines/table-engines/integrations/rabbitmq.md)
+- [PostgreSQL](../../engines/table-engines/integrations/postgresql.md)
-### Специальные движки {#spetsialnye-dvizhki}
+### Специальные движки {#special-engines}
Движки семейства:
diff --git a/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md b/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md
index f66e789a392..5a7909f63b2 100644
--- a/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md
+++ b/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md
@@ -1,5 +1,5 @@
---
-toc_priority: 6
+toc_priority: 9
toc_title: EmbeddedRocksDB
---
diff --git a/docs/ru/engines/table-engines/integrations/hdfs.md b/docs/ru/engines/table-engines/integrations/hdfs.md
index 3d9cb388a01..b56bbfc0788 100644
--- a/docs/ru/engines/table-engines/integrations/hdfs.md
+++ b/docs/ru/engines/table-engines/integrations/hdfs.md
@@ -1,5 +1,5 @@
---
-toc_priority: 4
+toc_priority: 6
toc_title: HDFS
---
diff --git a/docs/ru/engines/table-engines/integrations/jdbc.md b/docs/ru/engines/table-engines/integrations/jdbc.md
index e2db6fac0b2..fd7411a258e 100644
--- a/docs/ru/engines/table-engines/integrations/jdbc.md
+++ b/docs/ru/engines/table-engines/integrations/jdbc.md
@@ -1,5 +1,5 @@
---
-toc_priority: 2
+toc_priority: 3
toc_title: JDBC
---
diff --git a/docs/ru/engines/table-engines/integrations/kafka.md b/docs/ru/engines/table-engines/integrations/kafka.md
index f053b80aebd..19e2850dd51 100644
--- a/docs/ru/engines/table-engines/integrations/kafka.md
+++ b/docs/ru/engines/table-engines/integrations/kafka.md
@@ -1,5 +1,5 @@
---
-toc_priority: 5
+toc_priority: 8
toc_title: Kafka
---
diff --git a/docs/ru/engines/table-engines/integrations/mongodb.md b/docs/ru/engines/table-engines/integrations/mongodb.md
index 5ab63494648..97f903bdf89 100644
--- a/docs/ru/engines/table-engines/integrations/mongodb.md
+++ b/docs/ru/engines/table-engines/integrations/mongodb.md
@@ -1,5 +1,5 @@
---
-toc_priority: 7
+toc_priority: 5
toc_title: MongoDB
---
diff --git a/docs/ru/engines/table-engines/integrations/mysql.md b/docs/ru/engines/table-engines/integrations/mysql.md
index 9152a57d122..5011c8a93c6 100644
--- a/docs/ru/engines/table-engines/integrations/mysql.md
+++ b/docs/ru/engines/table-engines/integrations/mysql.md
@@ -1,5 +1,5 @@
---
-toc_priority: 3
+toc_priority: 4
toc_title: MySQL
---
diff --git a/docs/ru/engines/table-engines/integrations/odbc.md b/docs/ru/engines/table-engines/integrations/odbc.md
index b2faa9b1e9e..669977ff531 100644
--- a/docs/ru/engines/table-engines/integrations/odbc.md
+++ b/docs/ru/engines/table-engines/integrations/odbc.md
@@ -1,5 +1,5 @@
---
-toc_priority: 1
+toc_priority: 2
toc_title: ODBC
---
diff --git a/docs/ru/engines/table-engines/integrations/postgresql.md b/docs/ru/engines/table-engines/integrations/postgresql.md
index ecf431830f8..8964b1dbf02 100644
--- a/docs/ru/engines/table-engines/integrations/postgresql.md
+++ b/docs/ru/engines/table-engines/integrations/postgresql.md
@@ -1,11 +1,11 @@
---
-toc_priority: 8
+toc_priority: 11
toc_title: PostgreSQL
---
-# PosgtreSQL {#postgresql}
+#PostgreSQL {#postgresql}
-Движок PostgreSQL позволяет выполнять запросы `SELECT` над данными, хранящимися на удалённом PostgreSQL сервере.
+Движок PostgreSQL позволяет выполнять запросы `SELECT` и `INSERT` для таблиц на удаленном сервере PostgreSQL.
## Создание таблицы {#creating-a-table}
@@ -15,7 +15,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2],
...
-) ENGINE = PostgreSQL('host:port', 'database', 'table', 'user', 'password');
+) ENGINE = PostgreSQL('host:port', 'database', 'table', 'user', 'password'[, `schema`]);
```
Смотрите подробное описание запроса [CREATE TABLE](../../../sql-reference/statements/create/table.md#create-table-query).
@@ -29,25 +29,51 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
**Параметры движка**
- `host:port` — адрес сервера PostgreSQL.
-
- `database` — Имя базы данных на сервере PostgreSQL.
-
- `table` — Имя таблицы.
-
- `user` — Имя пользователя PostgreSQL.
-
- `password` — Пароль пользователя PostgreSQL.
+- `schema` — имя схемы, если не используется схема по умолчанию. Необязательный аргумент.
-SELECT запросы на стороне PostgreSQL выполняются как `COPY (SELECT ...) TO STDOUT` внутри транзакции PostgreSQL только на чтение с коммитом после каждого `SELECT` запроса.
+## Особенности реализации {#implementation-details}
-Простые условия для `WHERE` такие как `=, !=, >, >=, <, <=, IN` исполняются на стороне PostgreSQL сервера.
+Запросы `SELECT` на стороне PostgreSQL выполняются как `COPY (SELECT ...) TO STDOUT` внутри транзакции PostgreSQL только на чтение с коммитом после каждого запроса `SELECT`.
-Все операции объединения, аггрегации, сортировки, условия `IN [ array ]` и ограничения `LIMIT` выполняются на стороне ClickHouse только после того как запрос к PostgreSQL закончился.
+Простые условия для `WHERE`, такие как `=`, `!=`, `>`, `>=`, `<`, `<=` и `IN`, исполняются на стороне PostgreSQL сервера.
-INSERT запросы на стороне PostgreSQL выполняются как `COPY "table_name" (field1, field2, ... fieldN) FROM STDIN` внутри PostgreSQL транзакции с автоматическим коммитом после каждого `INSERT` запроса.
+Все операции объединения, аггрегации, сортировки, условия `IN [ array ]` и ограничения `LIMIT` выполняются на стороне ClickHouse только после того, как запрос к PostgreSQL закончился.
+
+Запросы `INSERT` на стороне PostgreSQL выполняются как `COPY "table_name" (field1, field2, ... fieldN) FROM STDIN` внутри PostgreSQL транзакции с автоматическим коммитом после каждого запроса `INSERT`.
PostgreSQL массивы конвертируются в массивы ClickHouse.
-Будьте осторожны в PostgreSQL массивы созданные как type_name[], являются многомерными и могут содержать в себе разное количество измерений в разных строках одной таблицы, внутри ClickHouse допустипы только многомерные массивы с одинаковым кол-вом измерений во всех строках таблицы.
+
+!!! info "Внимание"
+ Будьте внимательны, в PostgreSQL массивы, созданные как `type_name[]`, являются многомерными и могут содержать в себе разное количество измерений в разных строках одной таблицы. Внутри ClickHouse допустимы только многомерные массивы с одинаковым кол-вом измерений во всех строках таблицы.
+
+При использовании словаря PostgreSQL поддерживается приоритет реплик. Чем больше номер реплики, тем ниже ее приоритет. Наивысший приоритет у реплики с номером `0`.
+
+В примере ниже реплика `example01-1` имеет более высокий приоритет:
+
+```xml
+
+ 5432
+ clickhouse
+ qwerty
+
+ example01-1
+ 1
+
+
+ example01-2
+ 2
+
+ db_name
+
+ id=10
+ SQL_QUERY
+
+
+```
## Пример использования {#usage-example}
@@ -64,17 +90,17 @@ PRIMARY KEY (int_id));
CREATE TABLE
-postgres=# insert into test (int_id, str, "float") VALUES (1,'test',2);
+postgres=# INSERT INTO test (int_id, str, "float") VALUES (1,'test',2);
INSERT 0 1
-postgresql> select * from test;
+postgresql> SELECT * FROM test;
int_id | int_nullable | float | str | float_nullable
--------+--------------+-------+------+----------------
1 | | 2 | test |
(1 row)
```
-Таблица в ClickHouse, получение данных из PostgreSQL таблицы созданной выше:
+Таблица в ClickHouse, получение данных из PostgreSQL таблицы, созданной выше:
``` sql
CREATE TABLE default.postgresql_table
@@ -87,19 +113,33 @@ ENGINE = PostgreSQL('localhost:5432', 'public', 'test', 'postges_user', 'postgre
```
``` sql
-SELECT * FROM postgresql_table WHERE str IN ('test')
+SELECT * FROM postgresql_table WHERE str IN ('test');
```
``` text
┌─float_nullable─┬─str──┬─int_id─┐
│ ᴺᵁᴸᴸ │ test │ 1 │
└────────────────┴──────┴────────┘
-1 rows in set. Elapsed: 0.019 sec.
```
+Using Non-default Schema:
-## Смотри также {#see-also}
+```text
+postgres=# CREATE SCHEMA "nice.schema";
-- [Табличная функция ‘postgresql’](../../../sql-reference/table-functions/postgresql.md)
-- [Использование PostgreSQL в качестве истояника для внешнего словаря](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-postgresql)
+postgres=# CREATE TABLE "nice.schema"."nice.table" (a integer);
+postgres=# INSERT INTO "nice.schema"."nice.table" SELECT i FROM generate_series(0, 99) as t(i)
+```
+
+```sql
+CREATE TABLE pg_table_schema_with_dots (a UInt32)
+ ENGINE PostgreSQL('localhost:5432', 'clickhouse', 'nice.table', 'postgrsql_user', 'password', 'nice.schema');
+```
+
+**См. также**
+
+- [Табличная функция `postgresql`](../../../sql-reference/table-functions/postgresql.md)
+- [Использование PostgreSQL в качестве источника для внешнего словаря](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-postgresql)
+
+[Оригинальная статья](https://clickhouse.tech/docs/ru/engines/table-engines/integrations/postgresql/)
diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md
index b50347f6196..109146d27f4 100644
--- a/docs/ru/operations/server-configuration-parameters/settings.md
+++ b/docs/ru/operations/server-configuration-parameters/settings.md
@@ -481,7 +481,15 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
## max_concurrent_queries {#max-concurrent-queries}
-Максимальное количество одновременно обрабатываемых запросов.
+Определяет максимальное количество одновременно обрабатываемых запросов, связанных с таблицей семейства `MergeTree`. Запросы также могут быть ограничены настройками: [max_concurrent_queries_for_all_users](#max-concurrent-queries-for-all-users), [min_marks_to_honor_max_concurrent_queries](#min-marks-to-honor-max-concurrent-queries).
+
+!!! info "Примечание"
+ Параметры этих настроек могут быть изменены во время выполнения запросов и вступят в силу немедленно. Запросы, которые уже запущены, выполнятся без изменений.
+
+Возможные значения:
+
+- Положительное целое число.
+- 0 — выключена.
**Пример**
@@ -509,6 +517,21 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
- [max_concurrent_queries](#max-concurrent-queries)
+## min_marks_to_honor_max_concurrent_queries {#min-marks-to-honor-max-concurrent-queries}
+
+Определяет минимальное количество засечек, считываемых запросом для применения настройки [max_concurrent_queries](#max-concurrent-queries).
+
+Возможные значения:
+
+- Положительное целое число.
+- 0 — выключена.
+
+**Пример**
+
+``` xml
+10
+```
+
## max_connections {#max-connections}
Максимальное количество входящих соединений.
@@ -1159,4 +1182,3 @@ ClickHouse использует ZooKeeper для хранения метадан
```
-
diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md
index f95dc6657b2..d10ac2ab317 100644
--- a/docs/ru/operations/settings/settings.md
+++ b/docs/ru/operations/settings/settings.md
@@ -1792,6 +1792,19 @@ ClickHouse генерирует исключение
- [Движок Distributed](../../engines/table-engines/special/distributed.md#distributed)
- [Управление распределёнными таблицами](../../sql-reference/statements/system.md#query-language-system-distributed)
+## insert_distributed_one_random_shard {#insert_distributed_one_random_shard}
+
+Включает или отключает режим вставки данных в [Distributed](../../engines/table-engines/special/distributed.md#distributed)) таблицу в случайный шард при отсутствии ключ шардирования.
+
+По умолчанию при вставке данных в `Distributed` таблицу с несколькими шардами и при отсутствии ключа шардирования сервер ClickHouse будет отклонять любой запрос на вставку данных. Когда `insert_distributed_one_random_shard = 1`, вставки принимаются, а данные записываются в случайный шард.
+
+Возможные значения:
+
+- 0 — если у таблицы несколько шардов, но ключ шардирования отсутствует, вставка данных отклоняется.
+- 1 — если ключ шардирования отсутствует, то вставка данных осуществляется в случайный шард среди всех доступных шардов.
+
+Значение по умолчанию: `0`.
+
## insert_shard_id {#insert_shard_id}
Если не `0`, указывает, в какой шард [Distributed](../../engines/table-engines/special/distributed.md#distributed) таблицы данные будут вставлены синхронно.
diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md
index e3816e78547..a7999470330 100644
--- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md
+++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md
@@ -69,6 +69,7 @@ SETTINGS(format_csv_allow_single_quotes = 0)
- [ClickHouse](#dicts-external_dicts_dict_sources-clickhouse)
- [MongoDB](#dicts-external_dicts_dict_sources-mongodb)
- [Redis](#dicts-external_dicts_dict_sources-redis)
+ - [PostgreSQL](#dicts-external_dicts_dict_sources-postgresql)
## Локальный файл {#dicts-external_dicts_dict_sources-local_file}
diff --git a/docs/ru/sql-reference/functions/array-functions.md b/docs/ru/sql-reference/functions/array-functions.md
index 4538941a4a4..560795506a0 100644
--- a/docs/ru/sql-reference/functions/array-functions.md
+++ b/docs/ru/sql-reference/functions/array-functions.md
@@ -1111,6 +1111,78 @@ SELECT
Функция `arrayFilter` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей нужно передать лямбда-функцию, и этот аргумент не может быть опущен.
+## arrayFill(func, arr1, …) {#array-fill}
+
+Перебирает `arr1` от первого элемента к последнему и заменяет `arr1[i]` на `arr1[i - 1]`, если `func` вернула 0. Первый элемент `arr1` остаётся неизменным.
+
+Примеры:
+
+``` sql
+SELECT arrayFill(x -> not isNull(x), [1, null, 3, 11, 12, null, null, 5, 6, 14, null, null]) AS res
+```
+
+``` text
+┌─res──────────────────────────────┐
+│ [1,1,3,11,12,12,12,5,6,14,14,14] │
+└──────────────────────────────────┘
+```
+
+Функция `arrayFill` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей нужно передать лямбда-функцию, и этот аргумент не может быть опущен.
+
+## arrayReverseFill(func, arr1, …) {#array-reverse-fill}
+
+Перебирает `arr1` от последнего элемента к первому и заменяет `arr1[i]` на `arr1[i + 1]`, если `func` вернула 0. Последний элемент `arr1` остаётся неизменным.
+
+Примеры:
+
+``` sql
+SELECT arrayReverseFill(x -> not isNull(x), [1, null, 3, 11, 12, null, null, 5, 6, 14, null, null]) AS res
+```
+
+``` text
+┌─res────────────────────────────────┐
+│ [1,3,3,11,12,5,5,5,6,14,NULL,NULL] │
+└────────────────────────────────────┘
+```
+
+Функция `arrayReverseFill` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей нужно передать лямбда-функцию, и этот аргумент не может быть опущен.
+
+## arraySplit(func, arr1, …) {#array-split}
+
+Разделяет массив `arr1` на несколько. Если `func` возвращает не 0, то массив разделяется, а элемент помещается в левую часть. Массив не разбивается по первому элементу.
+
+Примеры:
+
+``` sql
+SELECT arraySplit((x, y) -> y, [1, 2, 3, 4, 5], [1, 0, 0, 1, 0]) AS res
+```
+
+``` text
+┌─res─────────────┐
+│ [[1,2,3],[4,5]] │
+└─────────────────┘
+```
+
+Функция `arraySplit` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей нужно передать лямбда-функцию, и этот аргумент не может быть опущен.
+
+## arrayReverseSplit(func, arr1, …) {#array-reverse-split}
+
+Разделяет массив `arr1` на несколько. Если `func` возвращает не 0, то массив разделяется, а элемент помещается в правую часть. Массив не разбивается по последнему элементу.
+
+Примеры:
+
+``` sql
+SELECT arrayReverseSplit((x, y) -> y, [1, 2, 3, 4, 5], [1, 0, 0, 1, 0]) AS res
+```
+
+``` text
+┌─res───────────────┐
+│ [[1],[2,3,4],[5]] │
+└───────────────────┘
+```
+
+Функция `arrayReverseSplit` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей нужно передать лямбда-функцию, и этот аргумент не может быть опущен.
+
## arrayExists(\[func,\] arr1, …) {#arrayexistsfunc-arr1}
Возвращает 1, если существует хотя бы один элемент массива `arr`, для которого функция func возвращает не 0. Иначе возвращает 0.
@@ -1137,7 +1209,7 @@ SELECT
## arrayMin {#array-min}
-Возвращает значение минимального элемента в исходном массиве.
+Возвращает значение минимального элемента в исходном массиве.
Если передана функция `func`, возвращается минимум из элементов массива, преобразованных этой функцией.
@@ -1192,7 +1264,7 @@ SELECT arrayMin(x -> (-x), [1, 2, 4]) AS res;
## arrayMax {#array-max}
-Возвращает значение максимального элемента в исходном массиве.
+Возвращает значение максимального элемента в исходном массиве.
Если передана функция `func`, возвращается максимум из элементов массива, преобразованных этой функцией.
@@ -1247,7 +1319,7 @@ SELECT arrayMax(x -> (-x), [1, 2, 4]) AS res;
## arraySum {#array-sum}
-Возвращает сумму элементов в исходном массиве.
+Возвращает сумму элементов в исходном массиве.
Если передана функция `func`, возвращается сумма элементов массива, преобразованных этой функцией.
@@ -1262,7 +1334,7 @@ arraySum([func,] arr)
**Аргументы**
- `func` — функция. [Expression](../../sql-reference/data-types/special-data-types/expression.md).
-- `arr` — массив. [Array](../../sql-reference/data-types/array.md).
+- `arr` — массив. [Array](../../sql-reference/data-types/array.md).
**Возвращаемое значение**
@@ -1302,7 +1374,7 @@ SELECT arraySum(x -> x*x, [2, 3]) AS res;
## arrayAvg {#array-avg}
-Возвращает среднее значение элементов в исходном массиве.
+Возвращает среднее значение элементов в исходном массиве.
Если передана функция `func`, возвращается среднее значение элементов массива, преобразованных этой функцией.
@@ -1317,7 +1389,7 @@ arrayAvg([func,] arr)
**Аргументы**
- `func` — функция. [Expression](../../sql-reference/data-types/special-data-types/expression.md).
-- `arr` — массив. [Array](../../sql-reference/data-types/array.md).
+- `arr` — массив. [Array](../../sql-reference/data-types/array.md).
**Возвращаемое значение**
@@ -1355,7 +1427,7 @@ SELECT arrayAvg(x -> (x * x), [2, 4]) AS res;
└─────┘
```
-**Синтаксис**
+**Синтаксис**
``` sql
arraySum(arr)
@@ -1367,7 +1439,7 @@ arraySum(arr)
Тип: [Int](../../sql-reference/data-types/int-uint.md) или [Float](../../sql-reference/data-types/float.md).
-**Аргументы**
+**Аргументы**
- `arr` — [массив](../../sql-reference/data-types/array.md).
diff --git a/docs/ru/sql-reference/functions/bit-functions.md b/docs/ru/sql-reference/functions/bit-functions.md
index 09844685a6c..a5124e67235 100644
--- a/docs/ru/sql-reference/functions/bit-functions.md
+++ b/docs/ru/sql-reference/functions/bit-functions.md
@@ -240,3 +240,53 @@ SELECT bitCount(333);
└───────────────┘
```
+## bitHammingDistance {#bithammingdistance}
+
+Возвращает [расстояние Хэмминга](https://ru.wikipedia.org/wiki/%D0%A0%D0%B0%D1%81%D1%81%D1%82%D0%BE%D1%8F%D0%BD%D0%B8%D0%B5_%D0%A5%D1%8D%D0%BC%D0%BC%D0%B8%D0%BD%D0%B3%D0%B0) между битовыми представлениями двух целых чисел. Может быть использовано с функциями [SimHash](../../sql-reference/functions/hash-functions.md#ngramsimhash) для проверки двух строк на схожесть. Чем меньше расстояние, тем больше вероятность, что строки совпадают.
+
+**Синтаксис**
+
+``` sql
+bitHammingDistance(int1, int2)
+```
+
+**Аргументы**
+
+- `int1` — первое целое число. [Int64](../../sql-reference/data-types/int-uint.md).
+- `int2` — второе целое число. [Int64](../../sql-reference/data-types/int-uint.md).
+
+**Возвращаемое значение**
+
+- Расстояние Хэмминга.
+
+Тип: [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Примеры**
+
+Запрос:
+
+``` sql
+SELECT bitHammingDistance(111, 121);
+```
+
+Результат:
+
+``` text
+┌─bitHammingDistance(111, 121)─┐
+│ 3 │
+└──────────────────────────────┘
+```
+
+Используя [SimHash](../../sql-reference/functions/hash-functions.md#ngramsimhash):
+
+``` sql
+SELECT bitHammingDistance(ngramSimHash('cat ate rat'), ngramSimHash('rat ate cat'));
+```
+
+Результат:
+
+``` text
+┌─bitHammingDistance(ngramSimHash('cat ate rat'), ngramSimHash('rat ate cat'))─┐
+│ 5 │
+└──────────────────────────────────────────────────────────────────────────────┘
+```
diff --git a/docs/ru/sql-reference/functions/hash-functions.md b/docs/ru/sql-reference/functions/hash-functions.md
index 6797f530346..2efff9c3727 100644
--- a/docs/ru/sql-reference/functions/hash-functions.md
+++ b/docs/ru/sql-reference/functions/hash-functions.md
@@ -7,6 +7,8 @@ toc_title: "Функции хэширования"
Функции хэширования могут использоваться для детерминированного псевдослучайного разбрасывания элементов.
+Simhash – это хеш-функция, которая для близких значений возвращает близкий хеш.
+
## halfMD5 {#hash-functions-halfmd5}
[Интерпретирует](../../sql-reference/functions/hash-functions.md#type_conversion_functions-reinterpretAsString) все входные параметры как строки и вычисляет хэш [MD5](https://ru.wikipedia.org/wiki/MD5) для каждой из них. Затем объединяет хэши, берет первые 8 байт хэша результирующей строки и интерпретирует их как значение типа `UInt64` с big-endian порядком байтов.
@@ -484,3 +486,937 @@ SELECT xxHash32('Hello, world!');
- [xxHash](http://cyan4973.github.io/xxHash/).
+## ngramSimHash {#ngramsimhash}
+
+Выделяет из ASCII строки отрезки (n-граммы) размером `ngramsize` символов и возвращает n-граммовый `simhash`. Функция регистрозависимая.
+
+Может быть использована для проверки двух строк на схожесть вместе с функцией [bitHammingDistance](../../sql-reference/functions/bit-functions.md#bithammingdistance). Чем меньше [расстояние Хэмминга](https://ru.wikipedia.org/wiki/%D0%A0%D0%B0%D1%81%D1%81%D1%82%D0%BE%D1%8F%D0%BD%D0%B8%D0%B5_%D0%A5%D1%8D%D0%BC%D0%BC%D0%B8%D0%BD%D0%B3%D0%B0) между результатом вычисления `simhash` двух строк, тем больше вероятность, что строки совпадают.
+
+**Синтаксис**
+
+``` sql
+ngramSimHash(string[, ngramsize])
+```
+
+**Аргументы**
+
+- `string` — строка. [String](../../sql-reference/data-types/string.md).
+- `ngramsize` — размер n-грамм. Необязательный. Возможные значения: любое число от `1` до `25`. Значение по умолчанию: `3`. [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Возвращаемое значение**
+
+- Значение хеш-функции от строки.
+
+Тип: [UInt64](../../sql-reference/data-types/int-uint.md).
+
+**Пример**
+
+Запрос:
+
+``` sql
+SELECT ngramSimHash('ClickHouse') AS Hash;
+```
+
+Результат:
+
+``` text
+┌───────Hash─┐
+│ 1627567969 │
+└────────────┘
+```
+
+## ngramSimHashCaseInsensitive {#ngramsimhashcaseinsensitive}
+
+Выделяет из ASCII строки отрезки (n-граммы) размером `ngramsize` символов и возвращает n-граммовый `simhash`. Функция регистро**не**зависимая.
+
+Может быть использована для проверки двух строк на схожесть вместе с функцией [bitHammingDistance](../../sql-reference/functions/bit-functions.md#bithammingdistance). Чем меньше [расстояние Хэмминга](https://ru.wikipedia.org/wiki/%D0%A0%D0%B0%D1%81%D1%81%D1%82%D0%BE%D1%8F%D0%BD%D0%B8%D0%B5_%D0%A5%D1%8D%D0%BC%D0%BC%D0%B8%D0%BD%D0%B3%D0%B0) между результатом вычисления `simhash` двух строк, тем больше вероятность, что строки совпадают.
+
+**Синтаксис**
+
+``` sql
+ngramSimHashCaseInsensitive(string[, ngramsize])
+```
+
+**Аргументы**
+
+- `string` — строка. [String](../../sql-reference/data-types/string.md).
+- `ngramsize` — размер n-грамм. Необязательный. Возможные значения: любое число от `1` до `25`. Значение по умолчанию: `3`. [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Возвращаемое значение**
+
+- Значение хеш-функции от строки.
+
+Тип: [UInt64](../../sql-reference/data-types/int-uint.md).
+
+**Пример**
+
+Запрос:
+
+``` sql
+SELECT ngramSimHashCaseInsensitive('ClickHouse') AS Hash;
+```
+
+Результат:
+
+``` text
+┌──────Hash─┐
+│ 562180645 │
+└───────────┘
+```
+
+## ngramSimHashUTF8 {#ngramsimhashutf8}
+
+Выделяет из UTF-8 строки отрезки (n-граммы) размером `ngramsize` символов и возвращает n-граммовый `simhash`. Функция регистрозависимая.
+
+Может быть использована для проверки двух строк на схожесть вместе с функцией [bitHammingDistance](../../sql-reference/functions/bit-functions.md#bithammingdistance). Чем меньше [расстояние Хэмминга](https://ru.wikipedia.org/wiki/%D0%A0%D0%B0%D1%81%D1%81%D1%82%D0%BE%D1%8F%D0%BD%D0%B8%D0%B5_%D0%A5%D1%8D%D0%BC%D0%BC%D0%B8%D0%BD%D0%B3%D0%B0) между результатом вычисления `simhash` двух строк, тем больше вероятность, что строки совпадают.
+
+**Синтаксис**
+
+``` sql
+ngramSimHashUTF8(string[, ngramsize])
+```
+
+**Аргументы**
+
+- `string` — строка. [String](../../sql-reference/data-types/string.md).
+- `ngramsize` — размер n-грамм. Необязательный. Возможные значения: любое число от `1` до `25`. Значение по умолчанию: `3`. [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Возвращаемое значение**
+
+- Значение хеш-функции от строки.
+
+Тип: [UInt64](../../sql-reference/data-types/int-uint.md).
+
+**Пример**
+
+Запрос:
+
+``` sql
+SELECT ngramSimHashUTF8('ClickHouse') AS Hash;
+```
+
+Результат:
+
+``` text
+┌───────Hash─┐
+│ 1628157797 │
+└────────────┘
+```
+
+## ngramSimHashCaseInsensitiveUTF8 {#ngramsimhashcaseinsensitiveutf8}
+
+Выделяет из UTF-8 строки отрезки (n-граммы) размером `ngramsize` символов и возвращает n-граммовый `simhash`. Функция регистро**не**зависимая.
+
+Может быть использована для проверки двух строк на схожесть вместе с функцией [bitHammingDistance](../../sql-reference/functions/bit-functions.md#bithammingdistance). Чем меньше [расстояние Хэмминга](https://ru.wikipedia.org/wiki/%D0%A0%D0%B0%D1%81%D1%81%D1%82%D0%BE%D1%8F%D0%BD%D0%B8%D0%B5_%D0%A5%D1%8D%D0%BC%D0%BC%D0%B8%D0%BD%D0%B3%D0%B0) между результатом вычисления `simhash` двух строк, тем больше вероятность, что строки совпадают.
+
+**Синтаксис**
+
+``` sql
+ngramSimHashCaseInsensitiveUTF8(string[, ngramsize])
+```
+
+**Аргументы**
+
+- `string` — строка. [String](../../sql-reference/data-types/string.md).
+- `ngramsize` — размер n-грамм. Необязательный. Возможные значения: любое число от `1` до `25`. Значение по умолчанию: `3`. [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Возвращаемое значение**
+
+- Значение хеш-функции от строки.
+
+Тип: [UInt64](../../sql-reference/data-types/int-uint.md).
+
+**Пример**
+
+Запрос:
+
+``` sql
+SELECT ngramSimHashCaseInsensitiveUTF8('ClickHouse') AS Hash;
+```
+
+Результат:
+
+``` text
+┌───────Hash─┐
+│ 1636742693 │
+└────────────┘
+```
+
+## wordShingleSimHash {#wordshinglesimhash}
+
+Выделяет из ASCII строки отрезки (шинглы) из `shinglesize` слов и возвращает шингловый `simhash`. Функция регистрозависимая.
+
+Может быть использована для проверки двух строк на схожесть вместе с функцией [bitHammingDistance](../../sql-reference/functions/bit-functions.md#bithammingdistance). Чем меньше [расстояние Хэмминга](https://ru.wikipedia.org/wiki/%D0%A0%D0%B0%D1%81%D1%81%D1%82%D0%BE%D1%8F%D0%BD%D0%B8%D0%B5_%D0%A5%D1%8D%D0%BC%D0%BC%D0%B8%D0%BD%D0%B3%D0%B0) между результатом вычисления `simhash` двух строк, тем больше вероятность, что строки совпадают.
+
+**Синтаксис**
+
+``` sql
+wordShingleSimHash(string[, shinglesize])
+```
+
+**Аргументы**
+
+- `string` — строка. [String](../../sql-reference/data-types/string.md).
+- `shinglesize` — размер словесных шинглов. Необязательный. Возможные значения: любое число от `1` до `25`. Значение по умолчанию: `3`. [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Возвращаемое значение**
+
+- Значение хеш-функции от строки.
+
+Тип: [UInt64](../../sql-reference/data-types/int-uint.md).
+
+**Пример**
+
+Запрос:
+
+``` sql
+SELECT wordShingleSimHash('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).') AS Hash;
+```
+
+Результат:
+
+``` text
+┌───────Hash─┐
+│ 2328277067 │
+└────────────┘
+```
+
+## wordShingleSimHashCaseInsensitive {#wordshinglesimhashcaseinsensitive}
+
+Выделяет из ASCII строки отрезки (шинглы) из `shinglesize` слов и возвращает шингловый `simhash`. Функция регистро**не**зависимая.
+
+Может быть использована для проверки двух строк на схожесть вместе с функцией [bitHammingDistance](../../sql-reference/functions/bit-functions.md#bithammingdistance). Чем меньше [расстояние Хэмминга](https://ru.wikipedia.org/wiki/%D0%A0%D0%B0%D1%81%D1%81%D1%82%D0%BE%D1%8F%D0%BD%D0%B8%D0%B5_%D0%A5%D1%8D%D0%BC%D0%BC%D0%B8%D0%BD%D0%B3%D0%B0) между результатом вычисления `simhash` двух строк, тем больше вероятность, что строки совпадают.
+
+**Синтаксис**
+
+``` sql
+wordShingleSimHashCaseInsensitive(string[, shinglesize])
+```
+
+**Аргументы**
+
+- `string` — строка. [String](../../sql-reference/data-types/string.md).
+- `shinglesize` — размер словесных шинглов. Необязательный. Возможные значения: любое число от `1` до `25`. Значение по умолчанию: `3`. [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Возвращаемое значение**
+
+- Значение хеш-функции от строки.
+
+Тип: [UInt64](../../sql-reference/data-types/int-uint.md).
+
+**Пример**
+
+Запрос:
+
+``` sql
+SELECT wordShingleSimHashCaseInsensitive('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).') AS Hash;
+```
+
+Результат:
+
+``` text
+┌───────Hash─┐
+│ 2194812424 │
+└────────────┘
+```
+
+## wordShingleSimHashUTF8 {#wordshinglesimhashutf8}
+
+Выделяет из UTF-8 строки отрезки (шинглы) из `shinglesize` слов и возвращает шингловый `simhash`. Функция регистрозависимая.
+
+Может быть использована для проверки двух строк на схожесть вместе с функцией [bitHammingDistance](../../sql-reference/functions/bit-functions.md#bithammingdistance). Чем меньше [расстояние Хэмминга](https://ru.wikipedia.org/wiki/%D0%A0%D0%B0%D1%81%D1%81%D1%82%D0%BE%D1%8F%D0%BD%D0%B8%D0%B5_%D0%A5%D1%8D%D0%BC%D0%BC%D0%B8%D0%BD%D0%B3%D0%B0) между результатом вычисления `simhash` двух строк, тем больше вероятность, что строки совпадают.
+
+**Синтаксис**
+
+``` sql
+wordShingleSimHashUTF8(string[, shinglesize])
+```
+
+**Аргументы**
+
+- `string` — строка. [String](../../sql-reference/data-types/string.md).
+- `shinglesize` — размер словесных шинглов. Необязательный. Возможные значения: любое число от `1` до `25`. Значение по умолчанию: `3`. [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Возвращаемое значение**
+
+- Значение хеш-функции от строки.
+
+Тип: [UInt64](../../sql-reference/data-types/int-uint.md).
+
+**Пример**
+
+Запрос:
+
+``` sql
+SELECT wordShingleSimHashUTF8('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).') AS Hash;
+```
+
+Результат:
+
+``` text
+┌───────Hash─┐
+│ 2328277067 │
+└────────────┘
+```
+
+## wordShingleSimHashCaseInsensitiveUTF8 {#wordshinglesimhashcaseinsensitiveutf8}
+
+Выделяет из UTF-8 строки отрезки (шинглы) из `shinglesize` слов и возвращает шингловый `simhash`. Функция регистро**не**зависимая.
+
+Может быть использована для проверки двух строк на схожесть вместе с функцией [bitHammingDistance](../../sql-reference/functions/bit-functions.md#bithammingdistance). Чем меньше [расстояние Хэмминга](https://ru.wikipedia.org/wiki/%D0%A0%D0%B0%D1%81%D1%81%D1%82%D0%BE%D1%8F%D0%BD%D0%B8%D0%B5_%D0%A5%D1%8D%D0%BC%D0%BC%D0%B8%D0%BD%D0%B3%D0%B0) между результатом вычисления `simhash` двух строк, тем больше вероятность, что строки совпадают.
+
+**Синтаксис**
+
+``` sql
+wordShingleSimHashCaseInsensitiveUTF8(string[, shinglesize])
+```
+
+**Аргументы**
+
+- `string` — строка. [String](../../sql-reference/data-types/string.md).
+- `shinglesize` — размер словесных шинглов. Необязательный. Возможные значения: любое число от `1` до `25`. Значение по умолчанию: `3`. [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Возвращаемое значение**
+
+- Значение хеш-функции от строки.
+
+Тип: [UInt64](../../sql-reference/data-types/int-uint.md).
+
+**Пример**
+
+Запрос:
+
+``` sql
+SELECT wordShingleSimHashCaseInsensitiveUTF8('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).') AS Hash;
+```
+
+Результат:
+
+``` text
+┌───────Hash─┐
+│ 2194812424 │
+└────────────┘
+```
+
+## ngramMinHash {#ngramminhash}
+
+Выделяет из ASCII строки отрезки (n-граммы) размером `ngramsize` символов и вычисляет хеш для каждой n-граммы. Использует `hashnum` минимальных хешей, чтобы вычислить минимальный хеш, и `hashnum` максимальных хешей, чтобы вычислить максимальный хеш. Возвращает кортеж из этих хешей. Функция регистрозависимая.
+
+Может быть использована для проверки двух строк на схожесть вместе с функцией [tupleHammingDistance](../../sql-reference/functions/tuple-functions.md#tuplehammingdistance). Если для двух строк минимальные или максимальные хеши одинаковы, мы считаем, что эти строки совпадают.
+
+**Синтаксис**
+
+``` sql
+ngramMinHash(string[, ngramsize, hashnum])
+```
+
+**Аргументы**
+
+- `string` — строка. [String](../../sql-reference/data-types/string.md).
+- `ngramsize` — размер n-грамм. Необязательный. Возможные значения: любое число от `1` до `25`. Значение по умолчанию: `3`. [UInt8](../../sql-reference/data-types/int-uint.md).
+- `hashnum` — количество минимальных и максимальных хешей, которое используется при вычислении результата. Необязательный. Возможные значения: любое число от `1` до `25`. Значение по умолчанию: `6`. [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Возвращаемое значение**
+
+- Кортеж с двумя хешами — минимальным и максимальным.
+
+Тип: [Tuple](../../sql-reference/data-types/tuple.md)([UInt64](../../sql-reference/data-types/int-uint.md), [UInt64](../../sql-reference/data-types/int-uint.md)).
+
+**Пример**
+
+Запрос:
+
+``` sql
+SELECT ngramMinHash('ClickHouse') AS Tuple;
+```
+
+Результат:
+
+``` text
+┌─Tuple──────────────────────────────────────┐
+│ (18333312859352735453,9054248444481805918) │
+└────────────────────────────────────────────┘
+```
+
+## ngramMinHashCaseInsensitive {#ngramminhashcaseinsensitive}
+
+Выделяет из ASCII строки отрезки (n-граммы) размером `ngramsize` символов и вычисляет хеш для каждой n-граммы. Использует `hashnum` минимальных хешей, чтобы вычислить минимальный хеш, и `hashnum` максимальных хешей, чтобы вычислить максимальный хеш. Возвращает кортеж из этих хешей. Функция регистро**не**зависимая.
+
+Может быть использована для проверки двух строк на схожесть вместе с функцией [tupleHammingDistance](../../sql-reference/functions/tuple-functions.md#tuplehammingdistance). Если для двух строк минимальные или максимальные хеши одинаковы, мы считаем, что эти строки совпадают.
+
+**Синтаксис**
+
+``` sql
+ngramMinHashCaseInsensitive(string[, ngramsize, hashnum])
+```
+
+**Аргументы**
+
+- `string` — строка. [String](../../sql-reference/data-types/string.md).
+- `ngramsize` — размер n-грамм. Необязательный. Возможные значения: любое число от `1` до `25`. Значение по умолчанию: `3`. [UInt8](../../sql-reference/data-types/int-uint.md).
+- `hashnum` — количество минимальных и максимальных хешей, которое используется при вычислении результата. Необязательный. Возможные значения: любое число от `1` до `25`. Значение по умолчанию: `6`. [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Возвращаемое значение**
+
+- Кортеж с двумя хешами — минимальным и максимальным.
+
+Тип: [Tuple](../../sql-reference/data-types/tuple.md)([UInt64](../../sql-reference/data-types/int-uint.md), [UInt64](../../sql-reference/data-types/int-uint.md)).
+
+**Пример**
+
+Запрос:
+
+``` sql
+SELECT ngramMinHashCaseInsensitive('ClickHouse') AS Tuple;
+```
+
+Результат:
+
+``` text
+┌─Tuple──────────────────────────────────────┐
+│ (2106263556442004574,13203602793651726206) │
+└────────────────────────────────────────────┘
+```
+
+## ngramMinHashUTF8 {#ngramminhashutf8}
+
+Выделяет из UTF-8 строки отрезки (n-граммы) размером `ngramsize` символов и вычисляет хеш для каждой n-граммы. Использует `hashnum` минимальных хешей, чтобы вычислить минимальный хеш, и `hashnum` максимальных хешей, чтобы вычислить максимальный хеш. Возвращает кортеж из этих хешей. Функция регистрозависимая.
+
+Может быть использована для проверки двух строк на схожесть вместе с функцией [tupleHammingDistance](../../sql-reference/functions/tuple-functions.md#tuplehammingdistance). Если для двух строк минимальные или максимальные хеши одинаковы, мы считаем, что эти строки совпадают.
+
+**Синтаксис**
+``` sql
+ngramMinHashUTF8(string[, ngramsize, hashnum])
+```
+
+**Аргументы**
+
+- `string` — строка. [String](../../sql-reference/data-types/string.md).
+- `ngramsize` — размер n-грамм. Необязательный. Возможные значения: любое число от `1` до `25`. Значение по умолчанию: `3`. [UInt8](../../sql-reference/data-types/int-uint.md).
+- `hashnum` — количество минимальных и максимальных хешей, которое используется при вычислении результата. Необязательный. Возможные значения: любое число от `1` до `25`. Значение по умолчанию: `6`. [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Возвращаемое значение**
+
+- Кортеж с двумя хешами — минимальным и максимальным.
+
+Тип: [Tuple](../../sql-reference/data-types/tuple.md)([UInt64](../../sql-reference/data-types/int-uint.md), [UInt64](../../sql-reference/data-types/int-uint.md)).
+
+**Пример**
+
+Запрос:
+
+``` sql
+SELECT ngramMinHashUTF8('ClickHouse') AS Tuple;
+```
+
+Результат:
+
+``` text
+┌─Tuple──────────────────────────────────────┐
+│ (18333312859352735453,6742163577938632877) │
+└────────────────────────────────────────────┘
+```
+
+## ngramMinHashCaseInsensitiveUTF8 {#ngramminhashcaseinsensitiveutf8}
+
+Выделяет из UTF-8 строки отрезки (n-граммы) размером `ngramsize` символов и вычисляет хеш для каждой n-граммы. Использует `hashnum` минимальных хешей, чтобы вычислить минимальный хеш, и `hashnum` максимальных хешей, чтобы вычислить максимальный хеш. Возвращает кортеж из этих хешей. Функция регистро**не**зависимая.
+
+Может быть использована для проверки двух строк на схожесть вместе с функцией [tupleHammingDistance](../../sql-reference/functions/tuple-functions.md#tuplehammingdistance). Если для двух строк минимальные или максимальные хеши одинаковы, мы считаем, что эти строки совпадают.
+
+**Синтаксис**
+
+``` sql
+ngramMinHashCaseInsensitiveUTF8(string [, ngramsize, hashnum])
+```
+
+**Аргументы**
+
+- `string` — строка. [String](../../sql-reference/data-types/string.md).
+- `ngramsize` — размер n-грамм. Необязательный. Возможные значения: любое число от `1` до `25`. Значение по умолчанию: `3`. [UInt8](../../sql-reference/data-types/int-uint.md).
+- `hashnum` — количество минимальных и максимальных хешей, которое используется при вычислении результата. Необязательный. Возможные значения: любое число от `1` до `25`. Значение по умолчанию: `6`. [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Возвращаемое значение**
+
+- Кортеж с двумя хешами — минимальным и максимальным.
+
+Тип: [Tuple](../../sql-reference/data-types/tuple.md)([UInt64](../../sql-reference/data-types/int-uint.md), [UInt64](../../sql-reference/data-types/int-uint.md)).
+
+**Пример**
+
+Запрос:
+
+``` sql
+SELECT ngramMinHashCaseInsensitiveUTF8('ClickHouse') AS Tuple;
+```
+
+Результат:
+
+``` text
+┌─Tuple───────────────────────────────────────┐
+│ (12493625717655877135,13203602793651726206) │
+└─────────────────────────────────────────────┘
+```
+
+## ngramMinHashArg {#ngramminhasharg}
+
+Выделяет из ASCII строки отрезки (n-граммы) размером `ngramsize` символов и возвращает n-граммы с минимальным и максимальным хешами, вычисленными функцией [ngramMinHash](#ngramminhash) с теми же входными данными. Функция регистрозависимая.
+
+**Синтаксис**
+
+``` sql
+ngramMinHashArg(string[, ngramsize, hashnum])
+```
+
+**Аргументы**
+
+- `string` — строка. [String](../../sql-reference/data-types/string.md).
+- `ngramsize` — размер n-грамм. Необязательный. Возможные значения: любое число от `1` до `25`. Значение по умолчанию: `3`. [UInt8](../../sql-reference/data-types/int-uint.md).
+- `hashnum` — количество минимальных и максимальных хешей, которое используется при вычислении результата. Необязательный. Возможные значения: любое число от `1` до `25`. Значение по умолчанию: `6`. [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Возвращаемое значение**
+
+- Кортеж из двух кортежей, каждый из которых состоит из `hashnum` n-грамм.
+
+Тип: [Tuple](../../sql-reference/data-types/tuple.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md)), [Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md))).
+
+**Пример**
+
+Запрос:
+
+``` sql
+SELECT ngramMinHashArg('ClickHouse') AS Tuple;
+```
+
+Результат:
+
+``` text
+┌─Tuple─────────────────────────────────────────────────────────────────────────┐
+│ (('ous','ick','lic','Hou','kHo','use'),('Hou','lic','ick','ous','ckH','Cli')) │
+└───────────────────────────────────────────────────────────────────────────────┘
+```
+
+## ngramMinHashArgCaseInsensitive {#ngramminhashargcaseinsensitive}
+
+Выделяет из ASCII строки отрезки (n-граммы) размером `ngramsize` символов и возвращает n-граммы с минимальным и максимальным хешами, вычисленными функцией [ngramMinHashCaseInsensitive](#ngramminhashcaseinsensitive) с теми же входными данными. Функция регистро**не**зависимая.
+
+**Синтаксис**
+
+``` sql
+ngramMinHashArgCaseInsensitive(string[, ngramsize, hashnum])
+```
+
+**Аргументы**
+
+- `string` — строка. [String](../../sql-reference/data-types/string.md).
+- `ngramsize` — размер n-грамм. Необязательный. Возможные значения: любое число от `1` до `25`. Значение по умолчанию: `3`. [UInt8](../../sql-reference/data-types/int-uint.md).
+- `hashnum` — количество минимальных и максимальных хешей, которое используется при вычислении результата. Необязательный. Возможные значения: любое число от `1` до `25`. Значение по умолчанию: `6`. [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Возвращаемое значение**
+
+- Кортеж из двух кортежей, каждый из которых состоит из `hashnum` n-грамм.
+
+Тип: [Tuple](../../sql-reference/data-types/tuple.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md)), [Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md))).
+
+**Пример**
+
+Запрос:
+
+``` sql
+SELECT ngramMinHashArgCaseInsensitive('ClickHouse') AS Tuple;
+```
+
+Результат:
+
+``` text
+┌─Tuple─────────────────────────────────────────────────────────────────────────┐
+│ (('ous','ick','lic','kHo','use','Cli'),('kHo','lic','ick','ous','ckH','Hou')) │
+└───────────────────────────────────────────────────────────────────────────────┘
+```
+
+## ngramMinHashArgUTF8 {#ngramminhashargutf8}
+
+Выделяет из UTF-8 строки отрезки (n-граммы) размером `ngramsize` символов и возвращает n-граммы с минимальным и максимальным хешами, вычисленными функцией [ngramMinHashUTF8](#ngramminhashutf8) с теми же входными данными. Функция регистрозависимая.
+
+**Синтаксис**
+
+``` sql
+ngramMinHashArgUTF8(string[, ngramsize, hashnum])
+```
+
+**Аргументы**
+
+- `string` — строка. [String](../../sql-reference/data-types/string.md).
+- `ngramsize` — размер n-грамм. Необязательный. Возможные значения: любое число от `1` до `25`. Значение по умолчанию: `3`. [UInt8](../../sql-reference/data-types/int-uint.md).
+- `hashnum` — количество минимальных и максимальных хешей, которое используется при вычислении результата. Необязательный. Возможные значения: любое число от `1` до `25`. Значение по умолчанию: `6`. [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Возвращаемое значение**
+
+- Кортеж из двух кортежей, каждый из которых состоит из `hashnum` n-грамм.
+
+Тип: [Tuple](../../sql-reference/data-types/tuple.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md)), [Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md))).
+
+**Пример**
+
+Запрос:
+
+``` sql
+SELECT ngramMinHashArgUTF8('ClickHouse') AS Tuple;
+```
+
+Результат:
+
+``` text
+┌─Tuple─────────────────────────────────────────────────────────────────────────┐
+│ (('ous','ick','lic','Hou','kHo','use'),('kHo','Hou','lic','ick','ous','ckH')) │
+└───────────────────────────────────────────────────────────────────────────────┘
+```
+
+## ngramMinHashArgCaseInsensitiveUTF8 {#ngramminhashargcaseinsensitiveutf8}
+
+Выделяет из UTF-8 строки отрезки (n-граммы) размером `ngramsize` символов и возвращает n-граммы с минимальным и максимальным хешами, вычисленными функцией [ngramMinHashCaseInsensitiveUTF8](#ngramminhashcaseinsensitiveutf8) с теми же входными данными. Функция регистро**не**зависимая.
+
+**Синтаксис**
+
+``` sql
+ngramMinHashArgCaseInsensitiveUTF8(string[, ngramsize, hashnum])
+```
+
+**Аргументы**
+
+- `string` — строка. [String](../../sql-reference/data-types/string.md).
+- `ngramsize` — размер n-грамм. Необязательный. Возможные значения: любое число от `1` до `25`. Значение по умолчанию: `3`. [UInt8](../../sql-reference/data-types/int-uint.md).
+- `hashnum` — количество минимальных и максимальных хешей, которое используется при вычислении результата. Необязательный. Возможные значения: любое число от `1` до `25`. Значение по умолчанию: `6`. [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Возвращаемое значение**
+
+- Кортеж из двух кортежей, каждый из которых состоит из `hashnum` n-грамм.
+
+Тип: [Tuple](../../sql-reference/data-types/tuple.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md)), [Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md))).
+
+**Пример**
+
+Запрос:
+
+``` sql
+SELECT ngramMinHashArgCaseInsensitiveUTF8('ClickHouse') AS Tuple;
+```
+
+Результат:
+
+``` text
+┌─Tuple─────────────────────────────────────────────────────────────────────────┐
+│ (('ckH','ous','ick','lic','kHo','use'),('kHo','lic','ick','ous','ckH','Hou')) │
+└───────────────────────────────────────────────────────────────────────────────┘
+```
+
+## wordShingleMinHash {#wordshingleminhash}
+
+Выделяет из ASCII строки отрезки (шинглы) из `shinglesize` слов и вычисляет хеш для каждого шингла. Использует `hashnum` минимальных хешей, чтобы вычислить минимальный хеш, и `hashnum` максимальных хешей, чтобы вычислить максимальный хеш. Возвращает кортеж из этих хешей. Функция регистрозависимая.
+
+Может быть использована для проверки двух строк на схожесть вместе с функцией [tupleHammingDistance](../../sql-reference/functions/tuple-functions.md#tuplehammingdistance). Если для двух строк минимальные или максимальные хеши одинаковы, мы считаем, что эти строки совпадают.
+
+**Синтаксис**
+
+``` sql
+wordShingleMinHash(string[, shinglesize, hashnum])
+```
+
+**Аргументы**
+
+- `string` — строка. [String](../../sql-reference/data-types/string.md).
+- `shinglesize` — размер словесных шинглов. Необязательный. Возможные значения: любое число от `1` до `25`. Значение по умолчанию: `3`. [UInt8](../../sql-reference/data-types/int-uint.md).
+- `hashnum` — количество минимальных и максимальных хешей, которое используется при вычислении результата. Необязательный. Возможные значения: любое число от `1` до `25`. Значение по умолчанию: `6`. [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Возвращаемое значение**
+
+- Кортеж с двумя хешами — минимальным и максимальным.
+
+Тип: [Tuple](../../sql-reference/data-types/tuple.md)([UInt64](../../sql-reference/data-types/int-uint.md), [UInt64](../../sql-reference/data-types/int-uint.md)).
+
+**Пример**
+
+Запрос:
+
+``` sql
+SELECT wordShingleMinHash('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).') AS Tuple;
+```
+
+Результат:
+
+``` text
+┌─Tuple──────────────────────────────────────┐
+│ (16452112859864147620,5844417301642981317) │
+└────────────────────────────────────────────┘
+```
+
+## wordShingleMinHashCaseInsensitive {#wordshingleminhashcaseinsensitive}
+
+Выделяет из ASCII строки отрезки (шинглы) из `shinglesize` слов и вычисляет хеш для каждого шингла. Использует `hashnum` минимальных хешей, чтобы вычислить минимальный хеш, и `hashnum` максимальных хешей, чтобы вычислить максимальный хеш. Возвращает кортеж из этих хешей. Функция регистро**не**зависимая.
+
+Может быть использована для проверки двух строк на схожесть вместе с функцией [tupleHammingDistance](../../sql-reference/functions/tuple-functions.md#tuplehammingdistance). Если для двух строк минимальные или максимальные хеши одинаковы, мы считаем, что эти строки совпадают.
+
+**Синтаксис**
+
+``` sql
+wordShingleMinHashCaseInsensitive(string[, shinglesize, hashnum])
+```
+
+**Аргументы**
+
+- `string` — строка. [String](../../sql-reference/data-types/string.md).
+- `shinglesize` — размер словесных шинглов. Необязательный. Возможные значения: любое число от `1` до `25`. Значение по умолчанию: `3`. [UInt8](../../sql-reference/data-types/int-uint.md).
+- `hashnum` — количество минимальных и максимальных хешей, которое используется при вычислении результата. Необязательный. Возможные значения: любое число от `1` до `25`. Значение по умолчанию: `6`. [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Возвращаемое значение**
+
+- Кортеж с двумя хешами — минимальным и максимальным.
+
+Тип: [Tuple](../../sql-reference/data-types/tuple.md)([UInt64](../../sql-reference/data-types/int-uint.md), [UInt64](../../sql-reference/data-types/int-uint.md)).
+
+**Пример**
+
+Запрос:
+
+``` sql
+SELECT wordShingleMinHashCaseInsensitive('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).') AS Tuple;
+```
+
+Результат:
+
+``` text
+┌─Tuple─────────────────────────────────────┐
+│ (3065874883688416519,1634050779997673240) │
+└───────────────────────────────────────────┘
+```
+
+## wordShingleMinHashUTF8 {#wordshingleminhashutf8}
+
+Выделяет из UTF-8 строки отрезки (шинглы) из `shinglesize` слов и вычисляет хеш для каждого шингла. Использует `hashnum` минимальных хешей, чтобы вычислить минимальный хеш, и `hashnum` максимальных хешей, чтобы вычислить максимальный хеш. Возвращает кортеж из этих хешей. Функция регистрозависимая.
+
+Может быть использована для проверки двух строк на схожесть вместе с функцией [tupleHammingDistance](../../sql-reference/functions/tuple-functions.md#tuplehammingdistance). Если для двух строк минимальные или максимальные хеши одинаковы, мы считаем, что эти строки совпадают.
+
+**Синтаксис**
+
+``` sql
+wordShingleMinHashUTF8(string[, shinglesize, hashnum])
+```
+
+**Аргументы**
+
+- `string` — строка. [String](../../sql-reference/data-types/string.md).
+- `shinglesize` — размер словесных шинглов. Необязательный. Возможные значения: любое число от `1` до `25`. Значение по умолчанию: `3`. [UInt8](../../sql-reference/data-types/int-uint.md).
+- `hashnum` — количество минимальных и максимальных хешей, которое используется при вычислении результата. Необязательный. Возможные значения: любое число от `1` до `25`. Значение по умолчанию: `6`. [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Возвращаемое значение**
+
+- Кортеж с двумя хешами — минимальным и максимальным.
+
+Тип: [Tuple](../../sql-reference/data-types/tuple.md)([UInt64](../../sql-reference/data-types/int-uint.md), [UInt64](../../sql-reference/data-types/int-uint.md)).
+
+**Пример**
+
+Запрос:
+
+``` sql
+SELECT wordShingleMinHashUTF8('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).') AS Tuple;
+```
+
+Результат:
+
+``` text
+┌─Tuple──────────────────────────────────────┐
+│ (16452112859864147620,5844417301642981317) │
+└────────────────────────────────────────────┘
+```
+
+## wordShingleMinHashCaseInsensitiveUTF8 {#wordshingleminhashcaseinsensitiveutf8}
+
+Выделяет из UTF-8 строки отрезки (шинглы) из `shinglesize` слов и вычисляет хеш для каждого шингла. Использует `hashnum` минимальных хешей, чтобы вычислить минимальный хеш, и `hashnum` максимальных хешей, чтобы вычислить максимальный хеш. Возвращает кортеж из этих хешей. Функция регистро**не**зависимая.
+
+Может быть использована для проверки двух строк на схожесть вместе с функцией [tupleHammingDistance](../../sql-reference/functions/tuple-functions.md#tuplehammingdistance). Если для двух строк минимальные или максимальные хеши одинаковы, мы считаем, что эти строки совпадают.
+
+**Синтаксис**
+
+``` sql
+wordShingleMinHashCaseInsensitiveUTF8(string[, shinglesize, hashnum])
+```
+
+**Аргументы**
+
+- `string` — строка. [String](../../sql-reference/data-types/string.md).
+- `shinglesize` — размер словесных шинглов. Необязательный. Возможные значения: любое число от `1` до `25`. Значение по умолчанию: `3`. [UInt8](../../sql-reference/data-types/int-uint.md).
+- `hashnum` — количество минимальных и максимальных хешей, которое используется при вычислении результата. Необязательный. Возможные значения: любое число от `1` до `25`. Значение по умолчанию: `6`. [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Возвращаемое значение**
+
+- Кортеж с двумя хешами — минимальным и максимальным.
+
+Тип: [Tuple](../../sql-reference/data-types/tuple.md)([UInt64](../../sql-reference/data-types/int-uint.md), [UInt64](../../sql-reference/data-types/int-uint.md)).
+
+**Пример**
+
+Запрос:
+
+``` sql
+SELECT wordShingleMinHashCaseInsensitiveUTF8('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).') AS Tuple;
+```
+
+Результат:
+
+``` text
+┌─Tuple─────────────────────────────────────┐
+│ (3065874883688416519,1634050779997673240) │
+└───────────────────────────────────────────┘
+```
+
+## wordShingleMinHashArg {#wordshingleminhasharg}
+
+Выделяет из ASCII строки отрезки (шинглы) из `shinglesize` слов и возвращает шинглы с минимальным и максимальным хешами, вычисленными функцией [wordshingleMinHash](#wordshingleminhash) с теми же входными данными. Функция регистрозависимая.
+
+**Синтаксис**
+
+``` sql
+wordShingleMinHashArg(string[, shinglesize, hashnum])
+```
+
+**Аргументы**
+
+- `string` — строка. [String](../../sql-reference/data-types/string.md).
+- `shinglesize` — размер словесных шинглов. Необязательный. Возможные значения: любое число от `1` до `25`. Значение по умолчанию: `3`. [UInt8](../../sql-reference/data-types/int-uint.md).
+- `hashnum` — количество минимальных и максимальных хешей, которое используется при вычислении результата. Необязательный. Возможные значения: любое число от `1` до `25`. Значение по умолчанию: `6`. [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Возвращаемое значение**
+
+- Кортеж из двух кортежей, каждый из которых состоит из `hashnum` шинглов.
+
+Тип: [Tuple](../../sql-reference/data-types/tuple.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md)), [Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md))).
+
+**Пример**
+
+Запрос:
+
+``` sql
+SELECT wordShingleMinHashArg('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).', 1, 3) AS Tuple;
+```
+
+Результат:
+
+``` text
+┌─Tuple─────────────────────────────────────────────────────────────────┐
+│ (('OLAP','database','analytical'),('online','oriented','processing')) │
+└───────────────────────────────────────────────────────────────────────┘
+```
+
+## wordShingleMinHashArgCaseInsensitive {#wordshingleminhashargcaseinsensitive}
+
+Выделяет из ASCII строки отрезки (шинглы) из `shinglesize` слов и возвращает шинглы с минимальным и максимальным хешами, вычисленными функцией [wordShingleMinHashCaseInsensitive](#wordshingleminhashcaseinsensitive) с теми же входными данными. Функция регистро**не**зависимая.
+
+**Синтаксис**
+
+``` sql
+wordShingleMinHashArgCaseInsensitive(string[, shinglesize, hashnum])
+```
+
+**Аргументы**
+
+- `string` — строка. [String](../../sql-reference/data-types/string.md).
+- `shinglesize` — размер словесных шинглов. Необязательный. Возможные значения: любое число от `1` до `25`. Значение по умолчанию: `3`. [UInt8](../../sql-reference/data-types/int-uint.md).
+- `hashnum` — количество минимальных и максимальных хешей, которое используется при вычислении результата. Необязательный. Возможные значения: любое число от `1` до `25`. Значение по умолчанию: `6`. [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Возвращаемое значение**
+
+- Кортеж из двух кортежей, каждый из которых состоит из `hashnum` шинглов.
+
+Тип: [Tuple](../../sql-reference/data-types/tuple.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md)), [Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md))).
+
+**Пример**
+
+Запрос:
+
+``` sql
+SELECT wordShingleMinHashArgCaseInsensitive('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).', 1, 3) AS Tuple;
+```
+
+Результат:
+
+``` text
+┌─Tuple──────────────────────────────────────────────────────────────────┐
+│ (('queries','database','analytical'),('oriented','processing','DBMS')) │
+└────────────────────────────────────────────────────────────────────────┘
+```
+
+## wordShingleMinHashArgUTF8 {#wordshingleminhashargutf8}
+
+Выделяет из UTF-8 строки отрезки (шинглы) из `shinglesize` слов и возвращает шинглы с минимальным и максимальным хешами, вычисленными функцией [wordShingleMinHashUTF8](#wordshingleminhashutf8) с теми же входными данными. Функция регистрозависимая.
+
+**Синтаксис**
+
+``` sql
+wordShingleMinHashArgUTF8(string[, shinglesize, hashnum])
+```
+
+**Аргументы**
+
+- `string` — строка. [String](../../sql-reference/data-types/string.md).
+- `shinglesize` — размер словесных шинглов. Необязательный. Возможные значения: любое число от `1` до `25`. Значение по умолчанию: `3`. [UInt8](../../sql-reference/data-types/int-uint.md).
+- `hashnum` — количество минимальных и максимальных хешей, которое используется при вычислении результата. Необязательный. Возможные значения: любое число от `1` до `25`. Значение по умолчанию: `6`. [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Возвращаемое значение**
+
+- Кортеж из двух кортежей, каждый из которых состоит из `hashnum` шинглов.
+
+Тип: [Tuple](../../sql-reference/data-types/tuple.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md)), [Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md))).
+
+**Пример**
+
+Запрос:
+
+``` sql
+SELECT wordShingleMinHashArgUTF8('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).', 1, 3) AS Tuple;
+```
+
+Результат:
+
+``` text
+┌─Tuple─────────────────────────────────────────────────────────────────┐
+│ (('OLAP','database','analytical'),('online','oriented','processing')) │
+└───────────────────────────────────────────────────────────────────────┘
+```
+
+## wordShingleMinHashArgCaseInsensitiveUTF8 {#wordshingleminhashargcaseinsensitiveutf8}
+
+Выделяет из UTF-8 строки отрезки (шинглы) из `shinglesize` слов и возвращает шинглы с минимальным и максимальным хешами, вычисленными функцией [wordShingleMinHashCaseInsensitiveUTF8](#wordshingleminhashcaseinsensitiveutf8) с теми же входными данными. Функция регистро**не**зависимая.
+
+**Синтаксис**
+
+``` sql
+wordShingleMinHashArgCaseInsensitiveUTF8(string[, shinglesize, hashnum])
+```
+
+**Аргументы**
+
+- `string` — строка. [String](../../sql-reference/data-types/string.md).
+- `shinglesize` — размер словесных шинглов. Необязательный. Возможные значения: любое число от `1` до `25`. Значение по умолчанию: `3`. [UInt8](../../sql-reference/data-types/int-uint.md).
+- `hashnum` — количество минимальных и максимальных хешей, которое используется при вычислении результата. Необязательный. Возможные значения: любое число от `1` до `25`. Значение по умолчанию: `6`. [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Возвращаемое значение**
+
+- Кортеж из двух кортежей, каждый из которых состоит из `hashnum` шинглов.
+
+Тип: [Tuple](../../sql-reference/data-types/tuple.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md)), [Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md))).
+
+**Пример**
+
+Запрос:
+
+``` sql
+SELECT wordShingleMinHashArgCaseInsensitiveUTF8('ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).', 1, 3) AS Tuple;
+```
+
+Результат:
+
+``` text
+┌─Tuple──────────────────────────────────────────────────────────────────┐
+│ (('queries','database','analytical'),('oriented','processing','DBMS')) │
+└────────────────────────────────────────────────────────────────────────┘
+```
diff --git a/docs/ru/sql-reference/functions/tuple-functions.md b/docs/ru/sql-reference/functions/tuple-functions.md
index a56eac27db2..381743a450b 100644
--- a/docs/ru/sql-reference/functions/tuple-functions.md
+++ b/docs/ru/sql-reference/functions/tuple-functions.md
@@ -111,3 +111,55 @@ SELECT untuple((* EXCEPT (v2, v3),)) FROM kv;
- [Tuple](../../sql-reference/data-types/tuple.md)
+## tupleHammingDistance {#tuplehammingdistance}
+
+Возвращает [расстояние Хэмминга](https://ru.wikipedia.org/wiki/%D0%A0%D0%B0%D1%81%D1%81%D1%82%D0%BE%D1%8F%D0%BD%D0%B8%D0%B5_%D0%A5%D1%8D%D0%BC%D0%BC%D0%B8%D0%BD%D0%B3%D0%B0) между двумя кортежами одинакового размера.
+
+**Синтаксис**
+
+``` sql
+tupleHammingDistance(tuple1, tuple2)
+```
+
+**Аргументы**
+
+- `tuple1` — первый кортеж. [Tuple](../../sql-reference/data-types/tuple.md).
+- `tuple2` — второй кортеж. [Tuple](../../sql-reference/data-types/tuple.md).
+
+Кортежи должны иметь одинаковый размер и тип элементов.
+
+**Возвращаемое значение**
+
+- Расстояние Хэмминга.
+
+Тип: [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Примеры**
+
+Запрос:
+
+``` sql
+SELECT tupleHammingDistance((1, 2, 3), (3, 2, 1)) AS HammingDistance;
+```
+
+Результат:
+
+``` text
+┌─HammingDistance─┐
+│ 2 │
+└─────────────────┘
+```
+
+Может быть использовано с функциями [MinHash](../../sql-reference/functions/hash-functions.md#ngramminhash) для проверки строк на совпадение:
+
+``` sql
+SELECT tupleHammingDistance(wordShingleMinHash(string), wordShingleMinHashCaseInsensitive(string)) as HammingDistance FROM (SELECT 'Clickhouse is a column-oriented database management system for online analytical processing of queries.' AS string);
+```
+
+Результат:
+
+``` text
+┌─HammingDistance─┐
+│ 2 │
+└─────────────────┘
+```
diff --git a/docs/ru/sql-reference/statements/alter/column.md b/docs/ru/sql-reference/statements/alter/column.md
index f51859b46f6..87fc1c78cd0 100644
--- a/docs/ru/sql-reference/statements/alter/column.md
+++ b/docs/ru/sql-reference/statements/alter/column.md
@@ -117,7 +117,7 @@ MODIFY COLUMN [IF EXISTS] name [type] [default_expr] [TTL] [AFTER name_after | F
- TTL
- Примеры изменения TTL столбца смотрите в разделе [TTL столбца](ttl.md#mergetree-column-ttl).
+ Примеры изменения TTL столбца смотрите в разделе [TTL столбца](../../../engines/table-engines/mergetree-family/mergetree.md#mergetree-column-ttl).
Если указано `IF EXISTS`, запрос не возвращает ошибку, если столбца не существует.
diff --git a/docs/ru/sql-reference/table-functions/postgresql.md b/docs/ru/sql-reference/table-functions/postgresql.md
index a8ed23db8ed..66637276726 100644
--- a/docs/ru/sql-reference/table-functions/postgresql.md
+++ b/docs/ru/sql-reference/table-functions/postgresql.md
@@ -5,43 +5,46 @@ toc_title: postgresql
# postgresql {#postgresql}
-Позволяет выполнять запросы `SELECT` над данными, хранящимися на удалённом PostgreSQL сервере.
+Позволяет выполнять запросы `SELECT` и `INSERT` над таблицами удаленной БД PostgreSQL.
**Синтаксис**
+
``` sql
-postgresql('host:port', 'database', 'table', 'user', 'password')
+postgresql('host:port', 'database', 'table', 'user', 'password'[, `schema`])
```
-**Параметры**
+**Аргументы**
- `host:port` — адрес сервера PostgreSQL.
-
- `database` — имя базы данных на удалённом сервере.
-
- `table` — имя таблицы на удалённом сервере.
-
- `user` — пользователь PostgreSQL.
-
- `password` — пароль пользователя.
-
-
-SELECT запросы на стороне PostgreSQL выполняются как `COPY (SELECT ...) TO STDOUT` внутри транзакции PostgreSQL только на чтение с коммитом после каждого `SELECT` запроса.
-
-Простые условия для `WHERE` такие как `=, !=, >, >=, <, <=, IN` исполняются на стороне PostgreSQL сервера.
-
-Все операции объединения, аггрегации, сортировки, условия `IN [ array ]` и ограничения `LIMIT` выполняются на стороне ClickHouse только после того как запрос к PostgreSQL закончился.
-
-INSERT запросы на стороне PostgreSQL выполняются как `COPY "table_name" (field1, field2, ... fieldN) FROM STDIN` внутри PostgreSQL транзакции с автоматическим коммитом после каждого `INSERT` запроса.
-
-PostgreSQL массивы конвертируются в массивы ClickHouse.
-Будьте осторожны в PostgreSQL массивы созданные как type_name[], являются многомерными и могут содержать в себе разное количество измерений в разных строках одной таблицы, внутри ClickHouse допустипы только многомерные массивы с одинаковым кол-вом измерений во всех строках таблицы.
+- `schema` — имя схемы, если не используется схема по умолчанию. Необязательный аргумент.
**Возвращаемое значение**
-Объект таблицы с теми же столбцами, что и в исходной таблице PostgreSQL.
+Таблица с теми же столбцами, что и в исходной таблице PostgreSQL.
!!! info "Примечание"
-В запросах `INSERT` для того чтобы отличить табличную функцию `postgresql(...)` от таблицы со списком имен столбцов вы должны указывать ключевые слова `FUNCTION` или `TABLE FUNCTION`. See examples below.
+ В запросах `INSERT` для того чтобы отличить табличную функцию `postgresql(...)` от таблицы со списком имен столбцов вы должны указывать ключевые слова `FUNCTION` или `TABLE FUNCTION`. См. примеры ниже.
+
+## Особенности реализации {#implementation-details}
+
+Запросы `SELECT` на стороне PostgreSQL выполняются как `COPY (SELECT ...) TO STDOUT` внутри транзакции PostgreSQL только на чтение с коммитом после каждого запроса `SELECT`.
+
+Простые условия для `WHERE` такие как `=`, `!=`, `>`, `>=`, `<`, `<=` и `IN` исполняются на стороне PostgreSQL сервера.
+
+Все операции объединения, аггрегации, сортировки, условия `IN [ array ]` и ограничения `LIMIT` выполняются на стороне ClickHouse только после того как запрос к PostgreSQL закончился.
+
+Запросы `INSERT` на стороне PostgreSQL выполняются как `COPY "table_name" (field1, field2, ... fieldN) FROM STDIN` внутри PostgreSQL транзакции с автоматическим коммитом после каждого запроса `INSERT`.
+
+PostgreSQL массивы конвертируются в массивы ClickHouse.
+
+!!! info "Примечание"
+ Будьте внимательны, в PostgreSQL массивы, созданные как `type_name[]`, являются многомерными и могут содержать в себе разное количество измерений в разных строках одной таблицы. Внутри ClickHouse допустипы только многомерные массивы с одинаковым кол-вом измерений во всех строках таблицы.
+
+При использовании словаря PostgreSQL поддерживается приоритет реплик. Чем больше номер реплики, тем ниже ее приоритет. Наивысший приоритет у реплики с номером `0`.
**Примеры**
@@ -58,10 +61,10 @@ PRIMARY KEY (int_id));
CREATE TABLE
-postgres=# insert into test (int_id, str, "float") VALUES (1,'test',2);
+postgres=# INSERT INTO test (int_id, str, "float") VALUES (1,'test',2);
INSERT 0 1
-postgresql> select * from test;
+postgresql> SELECT * FROM test;
int_id | int_nullable | float | str | float_nullable
--------+--------------+-------+------+----------------
1 | | 2 | test |
@@ -80,7 +83,7 @@ SELECT * FROM postgresql('localhost:5432', 'test', 'test', 'postgresql_user', 'p
└────────┴──────────────┴───────┴──────┴────────────────┘
```
-Вставка:
+Вставка данных:
```sql
INSERT INTO TABLE FUNCTION postgresql('localhost:5432', 'test', 'test', 'postgrsql_user', 'password') (int_id, float) VALUES (2, 3);
@@ -94,7 +97,24 @@ SELECT * FROM postgresql('localhost:5432', 'test', 'test', 'postgresql_user', 'p
└────────┴──────────────┴───────┴──────┴────────────────┘
```
-**Смотрите также**
+Using Non-default Schema:
-- [Движок таблиц ‘PostgreSQL’](../../sql-reference/table-functions/postgresql.md)
+```text
+postgres=# CREATE SCHEMA "nice.schema";
+
+postgres=# CREATE TABLE "nice.schema"."nice.table" (a integer);
+
+postgres=# INSERT INTO "nice.schema"."nice.table" SELECT i FROM generate_series(0, 99) as t(i)
+```
+
+```sql
+CREATE TABLE pg_table_schema_with_dots (a UInt32)
+ ENGINE PostgreSQL('localhost:5432', 'clickhouse', 'nice.table', 'postgrsql_user', 'password', 'nice.schema');
+```
+
+**См. также**
+
+- [Движок таблиц PostgreSQL](../../sql-reference/table-functions/postgresql.md)
- [Использование PostgreSQL как источника данных для внешнего словаря](../../sql-reference/table-functions/postgresql.md#dicts-external_dicts_dict_sources-postgresql)
+
+[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/table-functions/postgresql/)
diff --git a/programs/server/CMakeLists.txt b/programs/server/CMakeLists.txt
index 198d9081168..697851b294b 100644
--- a/programs/server/CMakeLists.txt
+++ b/programs/server/CMakeLists.txt
@@ -42,11 +42,16 @@ if (OS_LINUX)
set(RESOURCE_OBJS ${RESOURCE_OBJS} ${RESOURCE_OBJ})
# https://stackoverflow.com/questions/14776463/compile-and-add-an-object-file-from-a-binary-with-cmake
- add_custom_command(OUTPUT ${RESOURCE_OBJ}
- COMMAND cd ${CMAKE_CURRENT_SOURCE_DIR} && ${OBJCOPY_PATH} -I binary ${OBJCOPY_ARCH_OPTIONS} ${RESOURCE_FILE} ${CMAKE_CURRENT_BINARY_DIR}/${RESOURCE_OBJ}
- COMMAND ${OBJCOPY_PATH} --rename-section .data=.rodata,alloc,load,readonly,data,contents
- ${CMAKE_CURRENT_BINARY_DIR}/${RESOURCE_OBJ} ${CMAKE_CURRENT_BINARY_DIR}/${RESOURCE_OBJ})
-
+ # PPC64LE fails to do this with objcopy, use ld or lld instead
+ if (ARCH_PPC64LE)
+ add_custom_command(OUTPUT ${RESOURCE_OBJ}
+ COMMAND cd ${CMAKE_CURRENT_SOURCE_DIR} && ${CMAKE_LINKER} -m elf64lppc -r -b binary -o ${CMAKE_CURRENT_BINARY_DIR}/${RESOURCE_OBJ} ${RESOURCE_FILE})
+ else()
+ add_custom_command(OUTPUT ${RESOURCE_OBJ}
+ COMMAND cd ${CMAKE_CURRENT_SOURCE_DIR} && ${OBJCOPY_PATH} -I binary ${OBJCOPY_ARCH_OPTIONS} ${RESOURCE_FILE} ${CMAKE_CURRENT_BINARY_DIR}/${RESOURCE_OBJ}
+ COMMAND ${OBJCOPY_PATH} --rename-section .data=.rodata,alloc,load,readonly,data,contents
+ ${CMAKE_CURRENT_BINARY_DIR}/${RESOURCE_OBJ} ${CMAKE_CURRENT_BINARY_DIR}/${RESOURCE_OBJ})
+ endif()
set_source_files_properties(${RESOURCE_OBJ} PROPERTIES EXTERNAL_OBJECT true GENERATED true)
endforeach(RESOURCE_FILE)
diff --git a/src/Client/HedgedConnections.cpp b/src/Client/HedgedConnections.cpp
index a163ceba4a2..8455ef3117e 100644
--- a/src/Client/HedgedConnections.cpp
+++ b/src/Client/HedgedConnections.cpp
@@ -521,14 +521,17 @@ void HedgedConnections::processNewReplicaState(HedgedConnectionsFactory::State s
void HedgedConnections::finishProcessReplica(ReplicaState & replica, bool disconnect)
{
+ /// It's important to remove file descriptor from epoll exactly before cancelling packet_receiver,
+ /// because otherwise another thread can try to receive a packet, get this file descriptor
+ /// from epoll and resume cancelled packet_receiver.
+ epoll.remove(replica.packet_receiver->getFileDescriptor());
+ epoll.remove(replica.change_replica_timeout.getDescriptor());
+
replica.packet_receiver->cancel();
replica.change_replica_timeout.reset();
- epoll.remove(replica.packet_receiver->getFileDescriptor());
--offset_states[fd_to_replica_location[replica.packet_receiver->getFileDescriptor()].offset].active_connection_count;
fd_to_replica_location.erase(replica.packet_receiver->getFileDescriptor());
-
- epoll.remove(replica.change_replica_timeout.getDescriptor());
timeout_fd_to_replica_location.erase(replica.change_replica_timeout.getDescriptor());
--active_connection_count;
diff --git a/src/Common/HashTable/HashMap.h b/src/Common/HashTable/HashMap.h
index 99dc5414107..c3cd09eccb2 100644
--- a/src/Common/HashTable/HashMap.h
+++ b/src/Common/HashTable/HashMap.h
@@ -48,7 +48,7 @@ struct HashMapCell
value_type value;
- HashMapCell() {}
+ HashMapCell() = default;
HashMapCell(const Key & key_, const State &) : value(key_, NoInitTag()) {}
HashMapCell(const value_type & value_, const State &) : value(value_) {}
@@ -114,8 +114,39 @@ struct HashMapCell
static void move(HashMapCell * /* old_location */, HashMapCell * /* new_location */) {}
+ template
+ auto & get() & {
+ if constexpr (I == 0) return value.first;
+ else if constexpr (I == 1) return value.second;
+ }
+
+ template
+ auto const & get() const & {
+ if constexpr (I == 0) return value.first;
+ else if constexpr (I == 1) return value.second;
+ }
+
+ template
+ auto && get() && {
+ if constexpr (I == 0) return std::move(value.first);
+ else if constexpr (I == 1) return std::move(value.second);
+ }
+
};
+namespace std
+{
+
+ template
+ struct tuple_size> : std::integral_constant { };
+
+ template
+ struct tuple_element<0, HashMapCell> { using type = Key; };
+
+ template
+ struct tuple_element<1, HashMapCell> { using type = TMapped; };
+}
+
template
struct HashMapCellWithSavedHash : public HashMapCell
{
@@ -227,6 +258,19 @@ public:
}
};
+namespace std
+{
+
+ template
+ struct tuple_size> : std::integral_constant { };
+
+ template
+ struct tuple_element<0, HashMapCellWithSavedHash> { using type = Key; };
+
+ template
+ struct tuple_element<1, HashMapCellWithSavedHash> { using type = TMapped; };
+}
+
template <
typename Key,
diff --git a/src/Common/PODArray.h b/src/Common/PODArray.h
index 57ad3d46177..b1042332cfa 100644
--- a/src/Common/PODArray.h
+++ b/src/Common/PODArray.h
@@ -530,6 +530,31 @@ public:
this->c_end += bytes_to_copy;
}
+ template
+ void insertFromItself(iterator from_begin, iterator from_end, TAllocatorParams && ... allocator_params)
+ {
+ static_assert(memcpy_can_be_used_for_assignment, std::decay_t>);
+
+ /// Convert iterators to indexes because reserve can invalidate iterators
+ size_t start_index = from_begin - begin();
+ size_t end_index = from_end - begin();
+ size_t copy_size = end_index - start_index;
+
+ assert(start_index <= end_index);
+
+ size_t required_capacity = this->size() + copy_size;
+ if (required_capacity > this->capacity())
+ this->reserve(roundUpToPowerOfTwoOrZero(required_capacity), std::forward(allocator_params)...);
+
+ size_t bytes_to_copy = this->byte_size(copy_size);
+ if (bytes_to_copy)
+ {
+ auto begin = this->c_start + this->byte_size(start_index);
+ memcpy(this->c_end, reinterpret_cast(&*begin), bytes_to_copy);
+ this->c_end += bytes_to_copy;
+ }
+ }
+
template
void insert_assume_reserved(It1 from_begin, It2 from_end)
{
diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp
index c4cf7f11e68..9e81cdddbda 100644
--- a/src/Common/StackTrace.cpp
+++ b/src/Common/StackTrace.cpp
@@ -35,7 +35,7 @@ std::string signalToErrorMessage(int sig, const siginfo_t & info, const ucontext
else
error << "Address: " << info.si_addr;
-#if defined(__x86_64__) && !defined(__FreeBSD__) && !defined(__APPLE__) && !defined(__arm__)
+#if defined(__x86_64__) && !defined(__FreeBSD__) && !defined(__APPLE__) && !defined(__arm__) && !defined(__powerpc__)
auto err_mask = context.uc_mcontext.gregs[REG_ERR];
if ((err_mask & 0x02))
error << " Access: write.";
@@ -186,6 +186,8 @@ static void * getCallerAddress(const ucontext_t & context)
# endif
#elif defined(__aarch64__)
return reinterpret_cast(context.uc_mcontext.pc);
+#elif defined(__powerpc64__)
+ return reinterpret_cast(context.uc_mcontext.gp_regs[PT_NIP]);
#else
return nullptr;
#endif
diff --git a/src/Common/tests/gtest_pod_array.cpp b/src/Common/tests/gtest_pod_array.cpp
index 63cf7026757..9cc77b88195 100644
--- a/src/Common/tests/gtest_pod_array.cpp
+++ b/src/Common/tests/gtest_pod_array.cpp
@@ -33,6 +33,19 @@ TEST(Common, PODArrayInsert)
EXPECT_EQ(str, std::string(chars.data(), chars.size()));
}
+TEST(Common, PODArrayInsertFromItself)
+{
+ {
+ PaddedPODArray array { 1 };
+
+ for (size_t i = 0; i < 3; ++i)
+ array.insertFromItself(array.begin(), array.end());
+
+ PaddedPODArray expected {1,1,1,1,1,1,1,1};
+ ASSERT_EQ(array,expected);
+ }
+}
+
TEST(Common, PODPushBackRawMany)
{
PODArray chars;
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 0b47cf1f2f7..045433dc895 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -228,6 +228,7 @@ class IColumn;
M(Seconds, http_connection_timeout, DEFAULT_HTTP_READ_BUFFER_CONNECTION_TIMEOUT, "HTTP connection timeout.", 0) \
M(Seconds, http_send_timeout, DEFAULT_HTTP_READ_BUFFER_TIMEOUT, "HTTP send timeout", 0) \
M(Seconds, http_receive_timeout, DEFAULT_HTTP_READ_BUFFER_TIMEOUT, "HTTP receive timeout", 0) \
+ M(UInt64, http_max_uri_size, 16384, "Maximum URI length of HTTP request", 0) \
M(Bool, optimize_throw_if_noop, false, "If setting is enabled and OPTIMIZE query didn't actually assign a merge then an explanatory exception is thrown", 0) \
M(Bool, use_index_for_in_with_subqueries, true, "Try using an index if there is a subquery or a table expression on the right side of the IN operator.", 0) \
M(Bool, joined_subquery_requires_alias, true, "Force joined subqueries and table functions to have aliases for correct name qualification.", 0) \
@@ -546,7 +547,7 @@ struct Settings : public BaseSettings
{
/// For initialization from empty initializer-list to be "value initialization", not "aggregate initialization" in C++14.
/// http://en.cppreference.com/w/cpp/language/aggregate_initialization
- Settings() {}
+ Settings() = default;
/** Set multiple settings from "profile" (in server configuration file (users.xml), profiles contain groups of multiple settings).
* The profile can also be set using the `set` functions, like the profile setting.
diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp
index e0078da57b7..b4222a7e349 100644
--- a/src/Databases/DatabaseAtomic.cpp
+++ b/src/Databases/DatabaseAtomic.cpp
@@ -567,7 +567,7 @@ void DatabaseAtomic::renameDictionaryInMemoryUnlocked(const StorageID & old_name
auto result = external_loader.getLoadResult(toString(old_name.uuid));
if (!result.object)
return;
- const auto & dict = dynamic_cast(*result.object);
+ const auto & dict = dynamic_cast(*result.object);
dict.updateDictionaryName(new_name);
}
void DatabaseAtomic::waitDetachedTableNotInUse(const UUID & uuid)
diff --git a/src/Databases/DatabaseWithDictionaries.cpp b/src/Databases/DatabaseWithDictionaries.cpp
index d92f0f1897e..55b04f27c58 100644
--- a/src/Databases/DatabaseWithDictionaries.cpp
+++ b/src/Databases/DatabaseWithDictionaries.cpp
@@ -49,7 +49,7 @@ void DatabaseWithDictionaries::attachDictionary(const String & dictionary_name,
/// Attach the dictionary as table too.
try
{
- /// TODO Make StorageDictionary an owner of IDictionaryBase objects.
+ /// TODO Make StorageDictionary an owner of IDictionary objects.
/// All DDL operations with dictionaries will work with StorageDictionary table,
/// and StorageDictionary will be responsible for loading of DDL dictionaries.
/// ExternalLoaderDatabaseConfigRepository and other hacks related to ExternalLoader
diff --git a/src/Dictionaries/CacheDictionary.cpp b/src/Dictionaries/CacheDictionary.cpp
index eedf4dd3d87..535e862af40 100644
--- a/src/Dictionaries/CacheDictionary.cpp
+++ b/src/Dictionaries/CacheDictionary.cpp
@@ -13,7 +13,9 @@
#include
#include
#include
+
#include
+#include
namespace ProfileEvents
{
@@ -39,7 +41,6 @@ namespace DB
namespace ErrorCodes
{
extern const int CACHE_DICTIONARY_UPDATE_FAIL;
- extern const int TYPE_MISMATCH;
extern const int UNSUPPORTED_METHOD;
}
@@ -70,8 +71,6 @@ CacheDictionary::CacheDictionary(
{
if (!source_ptr->supportsSelectiveLoad())
throw Exception{full_name + ": source cannot be used with CacheDictionary", ErrorCodes::UNSUPPORTED_METHOD};
-
- setupHierarchicalAttribute();
}
template
@@ -120,164 +119,6 @@ const IDictionarySource * CacheDictionary::getSource() cons
return source_ptr.get();
}
-template
-void CacheDictionary::toParent(const PaddedPODArray & ids [[maybe_unused]], PaddedPODArray & out [[maybe_unused]]) const
-{
- if constexpr (dictionary_key_type == DictionaryKeyType::simple)
- {
- /// Run update on requested keys before fetch from storage
- const auto & attribute_name = hierarchical_attribute->name;
-
- auto result_type = std::make_shared();
- auto input_column = result_type->createColumn();
- auto & input_column_typed = assert_cast &>(*input_column);
- auto & data = input_column_typed.getData();
- data.insert(ids.begin(), ids.end());
-
- auto column = getColumn({attribute_name}, result_type, {std::move(input_column)}, {result_type}, {nullptr});
- const auto & result_column_typed = assert_cast &>(*column);
- const auto & result_data = result_column_typed.getData();
-
- out.assign(result_data);
- }
- else
- throw Exception("Hierarchy is not supported for complex key CacheDictionary", ErrorCodes::UNSUPPORTED_METHOD);
-}
-
-
-/// Allow to use single value in same way as array.
-static inline UInt64 getAt(const PaddedPODArray & arr, const size_t idx)
-{
- return arr[idx];
-}
-static inline UInt64 getAt(const UInt64 & value, const size_t)
-{
- return value;
-}
-
-template
-template
-void CacheDictionary::isInImpl(const PaddedPODArray & child_ids, const AncestorType & ancestor_ids, PaddedPODArray & out) const
-{
- /// Transform all children to parents until ancestor id or null_value will be reached.
-
- size_t out_size = out.size();
- memset(out.data(), 0xFF, out_size); /// 0xFF means "not calculated"
-
- const auto null_value = hierarchical_attribute->null_value.get();
-
- PaddedPODArray children(out_size, 0);
- PaddedPODArray parents(child_ids.begin(), child_ids.end());
-
- for (size_t i = 0; i < DBMS_HIERARCHICAL_DICTIONARY_MAX_DEPTH; ++i)
- {
- size_t out_idx = 0;
- size_t parents_idx = 0;
- size_t new_children_idx = 0;
-
- while (out_idx < out_size)
- {
- /// Already calculated
- if (out[out_idx] != 0xFF)
- {
- ++out_idx;
- continue;
- }
-
- /// No parent
- if (parents[parents_idx] == null_value)
- {
- out[out_idx] = 0;
- }
- /// Found ancestor
- else if (parents[parents_idx] == getAt(ancestor_ids, parents_idx))
- {
- out[out_idx] = 1;
- }
- /// Loop detected
- else if (children[new_children_idx] == parents[parents_idx])
- {
- out[out_idx] = 1;
- }
- /// Found intermediate parent, add this value to search at next loop iteration
- else
- {
- children[new_children_idx] = parents[parents_idx];
- ++new_children_idx;
- }
-
- ++out_idx;
- ++parents_idx;
- }
-
- if (new_children_idx == 0)
- break;
-
- /// Transform all children to its parents.
- children.resize(new_children_idx);
- parents.resize(new_children_idx);
-
- toParent(children, parents);
- }
-}
-
-template
-void CacheDictionary::isInVectorVector(
- const PaddedPODArray & child_ids, const PaddedPODArray & ancestor_ids, PaddedPODArray & out) const
-{
- isInImpl(child_ids, ancestor_ids, out);
-}
-
-template
-void CacheDictionary::isInVectorConstant(const PaddedPODArray & child_ids, const UInt64 ancestor_id, PaddedPODArray & out) const
-{
- isInImpl(child_ids, ancestor_id, out);
-}
-
-template
-void CacheDictionary::isInConstantVector(const UInt64 child_id, const PaddedPODArray & ancestor_ids, PaddedPODArray & out) const
-{
- /// Special case with single child value.
-
- const auto null_value = hierarchical_attribute->null_value.get();
-
- PaddedPODArray child(1, child_id);
- PaddedPODArray parent(1);
- std::vector ancestors(1, child_id);
-
- /// Iteratively find all ancestors for child.
- for (size_t i = 0; i < DBMS_HIERARCHICAL_DICTIONARY_MAX_DEPTH; ++i)
- {
- toParent(child, parent);
-
- if (parent[0] == null_value)
- break;
-
- child[0] = parent[0];
- ancestors.push_back(parent[0]);
- }
-
- /// Assuming short hierarchy, so linear search is Ok.
- for (size_t i = 0, out_size = out.size(); i < out_size; ++i)
- out[i] = std::find(ancestors.begin(), ancestors.end(), ancestor_ids[i]) != ancestors.end();
-}
-
-template
-void CacheDictionary::setupHierarchicalAttribute()
-{
- /// TODO: Move this to DictionaryStructure
- for (const auto & attribute : dict_struct.attributes)
- {
- if (attribute.hierarchical)
- {
- hierarchical_attribute = &attribute;
-
- if (attribute.underlying_type != AttributeUnderlyingType::utUInt64)
- throw Exception{full_name + ": hierarchical attribute must be UInt64.", ErrorCodes::TYPE_MISMATCH};
- }
- }
-}
-
template
ColumnPtr CacheDictionary::getColumn(
const std::string & attribute_name,
@@ -296,23 +137,6 @@ Columns CacheDictionary::getColumns(
const Columns & key_columns,
const DataTypes & key_types,
const Columns & default_values_columns) const
-{
- if (dictionary_key_type == DictionaryKeyType::complex)
- dict_struct.validateKeyTypes(key_types);
-
- Arena complex_keys_arena;
- DictionaryKeysExtractor extractor(key_columns, complex_keys_arena);
- auto & keys = extractor.getKeys();
-
- return getColumnsImpl(attribute_names, key_columns, keys, default_values_columns);
-}
-
-template
-Columns CacheDictionary::getColumnsImpl(
- const Strings & attribute_names,
- const Columns & key_columns,
- const PaddedPODArray & keys,
- const Columns & default_values_columns) const
{
/**
* Flow of getColumsImpl
@@ -328,6 +152,13 @@ Columns CacheDictionary::getColumnsImpl(
* use default value.
*/
+ if (dictionary_key_type == DictionaryKeyType::complex)
+ dict_struct.validateKeyTypes(key_types);
+
+ DictionaryKeysArenaHolder arena_holder;
+ DictionaryKeysExtractor extractor(key_columns, arena_holder.getComplexKeyArena());
+ auto keys = extractor.extractAllKeys();
+
DictionaryStorageFetchRequest request(dict_struct, attribute_names, default_values_columns);
FetchResult result_of_fetch_from_storage;
@@ -440,9 +271,10 @@ ColumnUInt8::Ptr CacheDictionary::hasKeys(const Columns & k
if (dictionary_key_type == DictionaryKeyType::complex)
dict_struct.validateKeyTypes(key_types);
- Arena complex_keys_arena;
- DictionaryKeysExtractor extractor(key_columns, complex_keys_arena);
- const auto & keys = extractor.getKeys();
+
+ DictionaryKeysArenaHolder arena_holder;
+ DictionaryKeysExtractor extractor(key_columns, arena_holder.getComplexKeyArena());
+ const auto keys = extractor.extractAllKeys();
/// We make empty request just to fetch if keys exists
DictionaryStorageFetchRequest request(dict_struct, {}, {});
@@ -526,6 +358,37 @@ ColumnUInt8::Ptr CacheDictionary::hasKeys(const Columns & k
return result;
}
+template
+ColumnPtr CacheDictionary::getHierarchy(
+ ColumnPtr key_column [[maybe_unused]],
+ const DataTypePtr & key_type [[maybe_unused]]) const
+{
+ if (dictionary_key_type == DictionaryKeyType::simple)
+ {
+ auto result = getKeysHierarchyDefaultImplementation(this, key_column, key_type);
+ query_count.fetch_add(key_column->size(), std::memory_order_relaxed);
+ return result;
+ }
+ else
+ return nullptr;
+}
+
+template
+ColumnUInt8::Ptr CacheDictionary::isInHierarchy(
+ ColumnPtr key_column [[maybe_unused]],
+ ColumnPtr in_key_column [[maybe_unused]],
+ const DataTypePtr & key_type [[maybe_unused]]) const
+{
+ if (dictionary_key_type == DictionaryKeyType::simple)
+ {
+ auto result = getKeysIsInHierarchyDefaultImplementation(this, key_column, in_key_column, key_type);
+ query_count.fetch_add(key_column->size(), std::memory_order_relaxed);
+ return result;
+ }
+ else
+ return nullptr;
+}
+
template
MutableColumns CacheDictionary::aggregateColumnsInOrderOfKeys(
const PaddedPODArray & keys,
@@ -618,19 +481,18 @@ MutableColumns CacheDictionary::aggregateColumns(
template
BlockInputStreamPtr CacheDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
{
- using BlockInputStreamType = DictionaryBlockInputStream;
- std::shared_ptr stream;
+ std::shared_ptr stream;
{
/// Write lock on storage
const ProfilingScopedWriteRWLock write_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs};
if constexpr (dictionary_key_type == DictionaryKeyType::simple)
- stream = std::make_shared(shared_from_this(), max_block_size, cache_storage_ptr->getCachedSimpleKeys(), column_names);
+ stream = std::make_shared(shared_from_this(), max_block_size, cache_storage_ptr->getCachedSimpleKeys(), column_names);
else
{
auto keys = cache_storage_ptr->getCachedComplexKeys();
- stream = std::make_shared(shared_from_this(), max_block_size, keys, column_names);
+ stream = std::make_shared(shared_from_this(), max_block_size, keys, column_names);
}
}
@@ -660,14 +522,20 @@ void CacheDictionary::update(CacheDictionaryUpdateUnitPtr requested_keys_extractor(update_unit_ptr->key_columns, update_unit_ptr->complex_key_arena);
- const auto & requested_keys = requested_keys_extractor.getKeys();
+ Arena * complex_key_arena = update_unit_ptr->complex_keys_arena_holder.getComplexKeyArena();
+ DictionaryKeysExtractor requested_keys_extractor(update_unit_ptr->key_columns, complex_key_arena);
+ auto requested_keys = requested_keys_extractor.extractAllKeys();
HashSet not_found_keys;
std::vector requested_keys_vector;
std::vector requested_complex_key_rows;
+ if constexpr (dictionary_key_type == DictionaryKeyType::simple)
+ requested_keys_vector.reserve(requested_keys.size());
+ else
+ requested_complex_key_rows.reserve(requested_keys.size());
+
auto & key_index_to_state_from_storage = update_unit_ptr->key_index_to_state;
for (size_t i = 0; i < key_index_to_state_from_storage.size(); ++i)
@@ -727,8 +595,8 @@ void CacheDictionary::update(CacheDictionaryUpdateUnitPtr keys_extractor(key_columns, update_unit_ptr->complex_key_arena);
- const auto & keys_extracted_from_block = keys_extractor.getKeys();
+ DictionaryKeysExtractor keys_extractor(key_columns, complex_key_arena);
+ auto keys_extracted_from_block = keys_extractor.extractAllKeys();
for (size_t index_of_attribute = 0; index_of_attribute < fetched_columns_during_update.size(); ++index_of_attribute)
{
@@ -740,6 +608,7 @@ void CacheDictionary::update(CacheDictionaryUpdateUnitPtrrequested_keys_to_fetched_columns_during_update_index[fetched_key_from_source] = found_keys_size;
found_keys_in_source.emplace_back(fetched_key_from_source);
diff --git a/src/Dictionaries/CacheDictionary.h b/src/Dictionaries/CacheDictionary.h
index 1192db73737..62cd509d006 100644
--- a/src/Dictionaries/CacheDictionary.h
+++ b/src/Dictionaries/CacheDictionary.h
@@ -130,33 +130,18 @@ public:
std::exception_ptr getLastException() const override;
- bool hasHierarchy() const override { return dictionary_key_type == DictionaryKeyType::simple && hierarchical_attribute; }
+ bool hasHierarchy() const override { return dictionary_key_type == DictionaryKeyType::simple && dict_struct.hierarchical_attribute_index.has_value(); }
- void toParent(const PaddedPODArray & ids, PaddedPODArray & out) const override;
+ ColumnPtr getHierarchy(ColumnPtr key_column, const DataTypePtr & key_type) const override;
- void isInVectorVector(
- const PaddedPODArray & child_ids,
- const PaddedPODArray & ancestor_ids,
- PaddedPODArray & out) const override;
-
- void isInVectorConstant(
- const PaddedPODArray & child_ids,
- const UInt64 ancestor_id, PaddedPODArray & out) const override;
-
- void isInConstantVector(
- const UInt64 child_id,
- const PaddedPODArray & ancestor_ids,
- PaddedPODArray & out) const override;
+ ColumnUInt8::Ptr isInHierarchy(
+ ColumnPtr key_column,
+ ColumnPtr in_key_column,
+ const DataTypePtr & key_type) const override;
private:
using FetchResult = std::conditional_t;
- Columns getColumnsImpl(
- const Strings & attribute_names,
- const Columns & key_columns,
- const PaddedPODArray & keys,
- const Columns & default_values_columns) const;
-
static MutableColumns aggregateColumnsInOrderOfKeys(
const PaddedPODArray & keys,
const DictionaryStorageFetchRequest & request,
@@ -171,8 +156,6 @@ private:
const MutableColumns & fetched_columns_during_update,
const HashMap & found_keys_to_fetched_columns_during_update_index);
- void setupHierarchicalAttribute();
-
void update(CacheDictionaryUpdateUnitPtr update_unit_ptr);
/// Update dictionary source pointer if required and return it. Thread safe.
@@ -193,9 +176,6 @@ private:
return source_ptr;
}
- template
- void isInImpl(const PaddedPODArray & child_ids, const AncestorType & ancestor_ids, PaddedPODArray & out) const;
-
const DictionaryStructure dict_struct;
/// Dictionary source should be used with mutex
@@ -218,8 +198,6 @@ private:
/// readers. Surprisingly this lock is also used for last_exception pointer.
mutable std::shared_mutex rw_lock;
- const DictionaryAttribute * hierarchical_attribute = nullptr;
-
mutable std::exception_ptr last_exception;
mutable std::atomic error_count {0};
mutable std::atomic backoff_end_time{std::chrono::system_clock::time_point{}};
diff --git a/src/Dictionaries/CacheDictionaryUpdateQueue.h b/src/Dictionaries/CacheDictionaryUpdateQueue.h
index 2e636af6db6..3d27a157752 100644
--- a/src/Dictionaries/CacheDictionaryUpdateQueue.h
+++ b/src/Dictionaries/CacheDictionaryUpdateQueue.h
@@ -66,8 +66,9 @@ public:
HashMap requested_keys_to_fetched_columns_during_update_index;
MutableColumns fetched_columns_during_update;
+
/// Complex keys are serialized in this arena
- Arena complex_key_arena;
+ DictionaryKeysArenaHolder complex_keys_arena_holder;
private:
template
diff --git a/src/Dictionaries/ComplexKeyHashedDictionary.cpp b/src/Dictionaries/ComplexKeyHashedDictionary.cpp
deleted file mode 100644
index 4086082e66d..00000000000
--- a/src/Dictionaries/ComplexKeyHashedDictionary.cpp
+++ /dev/null
@@ -1,594 +0,0 @@
-#include "ComplexKeyHashedDictionary.h"
-#include