Merge branch 'master' into fix-kafka-again

2024-11-10 09:32:06 +00:00 · 2019-04-16 14:00:48 +03:00 · 2019-04-16 14:00:48 +03:00 · dffe0eba40
commit dffe0eba40
parent a4dfa0d58d 8cd18c0723
378 changed files with 11940 additions and 2448 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -317,6 +317,7 @@ include (cmake/find_hdfs3.cmake) # uses protobuf
 include (cmake/find_consistent-hashing.cmake)
 include (cmake/find_base64.cmake)
 include (cmake/find_hyperscan.cmake)
+include (cmake/find_lfalloc.cmake)
 find_contrib_lib(cityhash)
 find_contrib_lib(farmhash)
 find_contrib_lib(metrohash)
--- a/README.md
+++ b/README.md
@ -10,7 +10,3 @@ ClickHouse is an open-source column-oriented database management system that all
 * [Blog](https://clickhouse.yandex/blog/en/) contains various ClickHouse-related articles, as well as announces and reports about events.
 * [Contacts](https://clickhouse.yandex/#contacts) can help to get your questions answered if there are any.
 * You can also [fill this form](https://forms.yandex.com/surveys/meet-yandex-clickhouse-team/) to meet Yandex ClickHouse team in person.
-
-## Upcoming Events
-
-* [ClickHouse Community Meetup](https://www.eventbrite.com/e/clickhouse-meetup-in-madrid-registration-55376746339) in Madrid on April 2.
--- a/cmake/find_lfalloc.cmake
+++ b/cmake/find_lfalloc.cmake
@ -0,0 +1,9 @@
+if (NOT SANITIZE AND NOT ARCH_ARM AND NOT ARCH_32 AND NOT ARCH_PPC64LE)
+    if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/lfalloc/src/lf_allocX64.h")
+        message (FATAL_ERROR "submodule contrib/lfalloc is missing. to fix try run: \n git submodule update --init --recursive")
+    endif()
+    set (USE_LFALLOC 1)
+    set (USE_LFALLOC_RANDOM_HINT 1)
+    set (LFALLOC_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/lfalloc/src)
+    message (STATUS "Using lfalloc=${USE_LFALLOC}: ${LFALLOC_INCLUDE_DIR}")
+endif ()
--- a/cmake/find_poco.cmake
+++ b/cmake/find_poco.cmake
@ -36,6 +36,8 @@ elseif (NOT MISSING_INTERNAL_POCO_LIBRARY)
    set (ENABLE_DATA_SQLITE 0 CACHE BOOL "")
    set (ENABLE_DATA_MYSQL 0 CACHE BOOL "")
    set (ENABLE_DATA_POSTGRESQL 0 CACHE BOOL "")
+    set (ENABLE_ENCODINGS 0 CACHE BOOL "")
+
    # new after 2.0.0:
    set (POCO_ENABLE_ZIP 0 CACHE BOOL "")
    set (POCO_ENABLE_PAGECOMPILER 0 CACHE BOOL "")
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@ -284,6 +284,7 @@ endif ()

 if (USE_INTERNAL_BROTLI_LIBRARY)
    add_subdirectory(brotli-cmake)
+    target_compile_definitions(brotli PRIVATE BROTLI_BUILD_PORTABLE=1)
 endif ()

 if (USE_INTERNAL_PROTOBUF_LIBRARY)
--- a/contrib/hyperscan
+++ b/contrib/hyperscan
@ -1 +1 @@
-Subproject commit 05dab0efee80be405aad5f74721b692b6889b75e
+Subproject commit 05b0f9064cca4bd55548dedb0a32ed9461146c1e
--- a/contrib/lfalloc/src/lf_allocX64.h
+++ b/contrib/lfalloc/src/lf_allocX64.h
--- a/contrib/lfalloc/src/lfmalloc.h
+++ b/contrib/lfalloc/src/lfmalloc.h
@ -0,0 +1,23 @@
+#pragma once
+
+#include <string.h>
+#include <stdlib.h>
+#include "util/system/compiler.h"
+
+namespace NMalloc {
+    volatile inline bool IsAllocatorCorrupted = false;
+
+    static inline void AbortFromCorruptedAllocator() {
+        IsAllocatorCorrupted = true;
+        abort();
+    }
+
+    struct TAllocHeader {
+        void* Block;
+        size_t AllocSize;
+        void Y_FORCE_INLINE Encode(void* block, size_t size, size_t signature) {
+            Block = block;
+            AllocSize = size | signature;
+        }
+    };
+}
--- a/contrib/lfalloc/src/util/README.md
+++ b/contrib/lfalloc/src/util/README.md
@ -0,0 +1,33 @@
+Style guide for the util folder is a stricter version of general style guide (mostly in terms of ambiguity resolution).
+
+ * all {} must be in K&R style
+ * &, * tied closer to a type, not to variable
+ * always use `using` not `typedef`
+ * even a single line block must be in braces {}:
+   ```
+   if (A) {
+       B();
+   }
+   ```
+ * _ at the end of private data member of a class - `First_`, `Second_`
+ * every .h file must be accompanied with corresponding .cpp to avoid a leakage and check that it is self contained
+ * prohibited to use `printf`-like functions
+
+
+Things declared in the general style guide, which sometimes are missed:
+
+ * `template <`, not `template<`
+ * `noexcept`, not `throw ()` nor `throw()`, not required for destructors
+ * indents inside `namespace` same as inside `class`
+
+
+Requirements for a new code (and for corrections in an old code which involves change of behaviour) in util:
+
+ * presence of UNIT-tests
+ * presence of comments in Doxygen style
+ * accessors without Get prefix (`Length()`, but not `GetLength()`)
+
+This guide is not a mandatory as there is the general style guide.
+Nevertheless if it is not followed, then a next `ya style .` run in the util folder will undeservedly update authors of some lines of code.
+
+Thus before a commit it is recommended to run `ya style .` in the util folder.
--- a/contrib/lfalloc/src/util/system/atomic.h
+++ b/contrib/lfalloc/src/util/system/atomic.h
@ -0,0 +1,51 @@
+#pragma once
+
+#include "defaults.h"
+
+using TAtomicBase = intptr_t;
+using TAtomic = volatile TAtomicBase;
+
+#if defined(__GNUC__)
+#include "atomic_gcc.h"
+#elif defined(_MSC_VER)
+#include "atomic_win.h"
+#else
+#error unsupported platform
+#endif
+
+#if !defined(ATOMIC_COMPILER_BARRIER)
+#define ATOMIC_COMPILER_BARRIER()
+#endif
+
+static inline TAtomicBase AtomicSub(TAtomic& a, TAtomicBase v) {
+    return AtomicAdd(a, -v);
+}
+
+static inline TAtomicBase AtomicGetAndSub(TAtomic& a, TAtomicBase v) {
+    return AtomicGetAndAdd(a, -v);
+}
+
+#if defined(USE_GENERIC_SETGET)
+static inline TAtomicBase AtomicGet(const TAtomic& a) {
+    return a;
+}
+
+static inline void AtomicSet(TAtomic& a, TAtomicBase v) {
+    a = v;
+}
+#endif
+
+static inline bool AtomicTryLock(TAtomic* a) {
+    return AtomicCas(a, 1, 0);
+}
+
+static inline bool AtomicTryAndTryLock(TAtomic* a) {
+    return (AtomicGet(*a) == 0) && AtomicTryLock(a);
+}
+
+static inline void AtomicUnlock(TAtomic* a) {
+    ATOMIC_COMPILER_BARRIER();
+    AtomicSet(*a, 0);
+}
+
+#include "atomic_ops.h"
--- a/contrib/lfalloc/src/util/system/atomic_gcc.h
+++ b/contrib/lfalloc/src/util/system/atomic_gcc.h
@ -0,0 +1,90 @@
+#pragma once
+
+#define ATOMIC_COMPILER_BARRIER() __asm__ __volatile__("" \
+                                                       :  \
+                                                       :  \
+                                                       : "memory")
+
+static inline TAtomicBase AtomicGet(const TAtomic& a) {
+    TAtomicBase tmp;
+#if defined(_arm64_)
+    __asm__ __volatile__(
+        "ldar %x[value], %[ptr]  \n\t"
+        : [value] "=r"(tmp)
+        : [ptr] "Q"(a)
+        : "memory");
+#else
+    __atomic_load(&a, &tmp, __ATOMIC_ACQUIRE);
+#endif
+    return tmp;
+}
+
+static inline void AtomicSet(TAtomic& a, TAtomicBase v) {
+#if defined(_arm64_)
+    __asm__ __volatile__(
+        "stlr %x[value], %[ptr]  \n\t"
+        : [ptr] "=Q"(a)
+        : [value] "r"(v)
+        : "memory");
+#else
+    __atomic_store(&a, &v, __ATOMIC_RELEASE);
+#endif
+}
+
+static inline intptr_t AtomicIncrement(TAtomic& p) {
+    return __atomic_add_fetch(&p, 1, __ATOMIC_SEQ_CST);
+}
+
+static inline intptr_t AtomicGetAndIncrement(TAtomic& p) {
+    return __atomic_fetch_add(&p, 1, __ATOMIC_SEQ_CST);
+}
+
+static inline intptr_t AtomicDecrement(TAtomic& p) {
+    return __atomic_sub_fetch(&p, 1, __ATOMIC_SEQ_CST);
+}
+
+static inline intptr_t AtomicGetAndDecrement(TAtomic& p) {
+    return __atomic_fetch_sub(&p, 1, __ATOMIC_SEQ_CST);
+}
+
+static inline intptr_t AtomicAdd(TAtomic& p, intptr_t v) {
+    return __atomic_add_fetch(&p, v, __ATOMIC_SEQ_CST);
+}
+
+static inline intptr_t AtomicGetAndAdd(TAtomic& p, intptr_t v) {
+    return __atomic_fetch_add(&p, v, __ATOMIC_SEQ_CST);
+}
+
+static inline intptr_t AtomicSwap(TAtomic* p, intptr_t v) {
+    (void)p; // disable strange 'parameter set but not used' warning on gcc
+    intptr_t ret;
+    __atomic_exchange(p, &v, &ret, __ATOMIC_SEQ_CST);
+    return ret;
+}
+
+static inline bool AtomicCas(TAtomic* a, intptr_t exchange, intptr_t compare) {
+    (void)a; // disable strange 'parameter set but not used' warning on gcc
+    return __atomic_compare_exchange(a, &compare, &exchange, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
+}
+
+static inline intptr_t AtomicGetAndCas(TAtomic* a, intptr_t exchange, intptr_t compare) {
+    (void)a; // disable strange 'parameter set but not used' warning on gcc
+    __atomic_compare_exchange(a, &compare, &exchange, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
+    return compare;
+}
+
+static inline intptr_t AtomicOr(TAtomic& a, intptr_t b) {
+    return __atomic_or_fetch(&a, b, __ATOMIC_SEQ_CST);
+}
+
+static inline intptr_t AtomicXor(TAtomic& a, intptr_t b) {
+    return __atomic_xor_fetch(&a, b, __ATOMIC_SEQ_CST);
+}
+
+static inline intptr_t AtomicAnd(TAtomic& a, intptr_t b) {
+    return __atomic_and_fetch(&a, b, __ATOMIC_SEQ_CST);
+}
+
+static inline void AtomicBarrier() {
+    __sync_synchronize();
+}
--- a/contrib/lfalloc/src/util/system/atomic_ops.h
+++ b/contrib/lfalloc/src/util/system/atomic_ops.h
@ -0,0 +1,189 @@
+#pragma once
+
+#include <type_traits>
+
+template <typename T>
+inline TAtomic* AsAtomicPtr(T volatile* target) {
+    return reinterpret_cast<TAtomic*>(target);
+}
+
+template <typename T>
+inline const TAtomic* AsAtomicPtr(T const volatile* target) {
+    return reinterpret_cast<const TAtomic*>(target);
+}
+
+// integral types
+
+template <typename T>
+struct TAtomicTraits {
+    enum {
+        Castable = std::is_integral<T>::value && sizeof(T) == sizeof(TAtomicBase) && !std::is_const<T>::value,
+    };
+};
+
+template <typename T, typename TT>
+using TEnableIfCastable = std::enable_if_t<TAtomicTraits<T>::Castable, TT>;
+
+template <typename T>
+inline TEnableIfCastable<T, T> AtomicGet(T const volatile& target) {
+    return static_cast<T>(AtomicGet(*AsAtomicPtr(&target)));
+}
+
+template <typename T>
+inline TEnableIfCastable<T, void> AtomicSet(T volatile& target, TAtomicBase value) {
+    AtomicSet(*AsAtomicPtr(&target), value);
+}
+
+template <typename T>
+inline TEnableIfCastable<T, T> AtomicIncrement(T volatile& target) {
+    return static_cast<T>(AtomicIncrement(*AsAtomicPtr(&target)));
+}
+
+template <typename T>
+inline TEnableIfCastable<T, T> AtomicGetAndIncrement(T volatile& target) {
+    return static_cast<T>(AtomicGetAndIncrement(*AsAtomicPtr(&target)));
+}
+
+template <typename T>
+inline TEnableIfCastable<T, T> AtomicDecrement(T volatile& target) {
+    return static_cast<T>(AtomicDecrement(*AsAtomicPtr(&target)));
+}
+
+template <typename T>
+inline TEnableIfCastable<T, T> AtomicGetAndDecrement(T volatile& target) {
+    return static_cast<T>(AtomicGetAndDecrement(*AsAtomicPtr(&target)));
+}
+
+template <typename T>
+inline TEnableIfCastable<T, T> AtomicAdd(T volatile& target, TAtomicBase value) {
+    return static_cast<T>(AtomicAdd(*AsAtomicPtr(&target), value));
+}
+
+template <typename T>
+inline TEnableIfCastable<T, T> AtomicGetAndAdd(T volatile& target, TAtomicBase value) {
+    return static_cast<T>(AtomicGetAndAdd(*AsAtomicPtr(&target), value));
+}
+
+template <typename T>
+inline TEnableIfCastable<T, T> AtomicSub(T volatile& target, TAtomicBase value) {
+    return static_cast<T>(AtomicSub(*AsAtomicPtr(&target), value));
+}
+
+template <typename T>
+inline TEnableIfCastable<T, T> AtomicGetAndSub(T volatile& target, TAtomicBase value) {
+    return static_cast<T>(AtomicGetAndSub(*AsAtomicPtr(&target), value));
+}
+
+template <typename T>
+inline TEnableIfCastable<T, T> AtomicSwap(T volatile* target, TAtomicBase exchange) {
+    return static_cast<T>(AtomicSwap(AsAtomicPtr(target), exchange));
+}
+
+template <typename T>
+inline TEnableIfCastable<T, bool> AtomicCas(T volatile* target, TAtomicBase exchange, TAtomicBase compare) {
+    return AtomicCas(AsAtomicPtr(target), exchange, compare);
+}
+
+template <typename T>
+inline TEnableIfCastable<T, T> AtomicGetAndCas(T volatile* target, TAtomicBase exchange, TAtomicBase compare) {
+    return static_cast<T>(AtomicGetAndCas(AsAtomicPtr(target), exchange, compare));
+}
+
+template <typename T>
+inline TEnableIfCastable<T, bool> AtomicTryLock(T volatile* target) {
+    return AtomicTryLock(AsAtomicPtr(target));
+}
+
+template <typename T>
+inline TEnableIfCastable<T, bool> AtomicTryAndTryLock(T volatile* target) {
+    return AtomicTryAndTryLock(AsAtomicPtr(target));
+}
+
+template <typename T>
+inline TEnableIfCastable<T, void> AtomicUnlock(T volatile* target) {
+    AtomicUnlock(AsAtomicPtr(target));
+}
+
+template <typename T>
+inline TEnableIfCastable<T, T> AtomicOr(T volatile& target, TAtomicBase value) {
+    return static_cast<T>(AtomicOr(*AsAtomicPtr(&target), value));
+}
+
+template <typename T>
+inline TEnableIfCastable<T, T> AtomicAnd(T volatile& target, TAtomicBase value) {
+    return static_cast<T>(AtomicAnd(*AsAtomicPtr(&target), value));
+}
+
+template <typename T>
+inline TEnableIfCastable<T, T> AtomicXor(T volatile& target, TAtomicBase value) {
+    return static_cast<T>(AtomicXor(*AsAtomicPtr(&target), value));
+}
+
+// pointer types
+
+template <typename T>
+inline T* AtomicGet(T* const volatile& target) {
+    return reinterpret_cast<T*>(AtomicGet(*AsAtomicPtr(&target)));
+}
+
+template <typename T>
+inline void AtomicSet(T* volatile& target, T* value) {
+    AtomicSet(*AsAtomicPtr(&target), reinterpret_cast<TAtomicBase>(value));
+}
+
+using TNullPtr = decltype(nullptr);
+
+template <typename T>
+inline void AtomicSet(T* volatile& target, TNullPtr) {
+    AtomicSet(*AsAtomicPtr(&target), 0);
+}
+
+template <typename T>
+inline T* AtomicSwap(T* volatile* target, T* exchange) {
+    return reinterpret_cast<T*>(AtomicSwap(AsAtomicPtr(target), reinterpret_cast<TAtomicBase>(exchange)));
+}
+
+template <typename T>
+inline T* AtomicSwap(T* volatile* target, TNullPtr) {
+    return reinterpret_cast<T*>(AtomicSwap(AsAtomicPtr(target), 0));
+}
+
+template <typename T>
+inline bool AtomicCas(T* volatile* target, T* exchange, T* compare) {
+    return AtomicCas(AsAtomicPtr(target), reinterpret_cast<TAtomicBase>(exchange), reinterpret_cast<TAtomicBase>(compare));
+}
+
+template <typename T>
+inline T* AtomicGetAndCas(T* volatile* target, T* exchange, T* compare) {
+    return reinterpret_cast<T*>(AtomicGetAndCas(AsAtomicPtr(target), reinterpret_cast<TAtomicBase>(exchange), reinterpret_cast<TAtomicBase>(compare)));
+}
+
+template <typename T>
+inline bool AtomicCas(T* volatile* target, T* exchange, TNullPtr) {
+    return AtomicCas(AsAtomicPtr(target), reinterpret_cast<TAtomicBase>(exchange), 0);
+}
+
+template <typename T>
+inline T* AtomicGetAndCas(T* volatile* target, T* exchange, TNullPtr) {
+    return reinterpret_cast<T*>(AtomicGetAndCas(AsAtomicPtr(target), reinterpret_cast<TAtomicBase>(exchange), 0));
+}
+
+template <typename T>
+inline bool AtomicCas(T* volatile* target, TNullPtr, T* compare) {
+    return AtomicCas(AsAtomicPtr(target), 0, reinterpret_cast<TAtomicBase>(compare));
+}
+
+template <typename T>
+inline T* AtomicGetAndCas(T* volatile* target, TNullPtr, T* compare) {
+    return reinterpret_cast<T*>(AtomicGetAndCas(AsAtomicPtr(target), 0, reinterpret_cast<TAtomicBase>(compare)));
+}
+
+template <typename T>
+inline bool AtomicCas(T* volatile* target, TNullPtr, TNullPtr) {
+    return AtomicCas(AsAtomicPtr(target), 0, 0);
+}
+
+template <typename T>
+inline T* AtomicGetAndCas(T* volatile* target, TNullPtr, TNullPtr) {
+    return reinterpret_cast<T*>(AtomicGetAndCas(AsAtomicPtr(target), 0, 0));
+}
--- a/contrib/lfalloc/src/util/system/atomic_win.h
+++ b/contrib/lfalloc/src/util/system/atomic_win.h
@ -0,0 +1,114 @@
+#pragma once
+
+#include <intrin.h>
+
+#define USE_GENERIC_SETGET
+
+#if defined(_i386_)
+
+#pragma intrinsic(_InterlockedIncrement)
+#pragma intrinsic(_InterlockedDecrement)
+#pragma intrinsic(_InterlockedExchangeAdd)
+#pragma intrinsic(_InterlockedExchange)
+#pragma intrinsic(_InterlockedCompareExchange)
+
+static inline intptr_t AtomicIncrement(TAtomic& a) {
+    return _InterlockedIncrement((volatile long*)&a);
+}
+
+static inline intptr_t AtomicGetAndIncrement(TAtomic& a) {
+    return _InterlockedIncrement((volatile long*)&a) - 1;
+}
+
+static inline intptr_t AtomicDecrement(TAtomic& a) {
+    return _InterlockedDecrement((volatile long*)&a);
+}
+
+static inline intptr_t AtomicGetAndDecrement(TAtomic& a) {
+    return _InterlockedDecrement((volatile long*)&a) + 1;
+}
+
+static inline intptr_t AtomicAdd(TAtomic& a, intptr_t b) {
+    return _InterlockedExchangeAdd((volatile long*)&a, b) + b;
+}
+
+static inline intptr_t AtomicGetAndAdd(TAtomic& a, intptr_t b) {
+    return _InterlockedExchangeAdd((volatile long*)&a, b);
+}
+
+static inline intptr_t AtomicSwap(TAtomic* a, intptr_t b) {
+    return _InterlockedExchange((volatile long*)a, b);
+}
+
+static inline bool AtomicCas(TAtomic* a, intptr_t exchange, intptr_t compare) {
+    return _InterlockedCompareExchange((volatile long*)a, exchange, compare) == compare;
+}
+
+static inline intptr_t AtomicGetAndCas(TAtomic* a, intptr_t exchange, intptr_t compare) {
+    return _InterlockedCompareExchange((volatile long*)a, exchange, compare);
+}
+
+#else // _x86_64_
+
+#pragma intrinsic(_InterlockedIncrement64)
+#pragma intrinsic(_InterlockedDecrement64)
+#pragma intrinsic(_InterlockedExchangeAdd64)
+#pragma intrinsic(_InterlockedExchange64)
+#pragma intrinsic(_InterlockedCompareExchange64)
+
+static inline intptr_t AtomicIncrement(TAtomic& a) {
+    return _InterlockedIncrement64((volatile __int64*)&a);
+}
+
+static inline intptr_t AtomicGetAndIncrement(TAtomic& a) {
+    return _InterlockedIncrement64((volatile __int64*)&a) - 1;
+}
+
+static inline intptr_t AtomicDecrement(TAtomic& a) {
+    return _InterlockedDecrement64((volatile __int64*)&a);
+}
+
+static inline intptr_t AtomicGetAndDecrement(TAtomic& a) {
+    return _InterlockedDecrement64((volatile __int64*)&a) + 1;
+}
+
+static inline intptr_t AtomicAdd(TAtomic& a, intptr_t b) {
+    return _InterlockedExchangeAdd64((volatile __int64*)&a, b) + b;
+}
+
+static inline intptr_t AtomicGetAndAdd(TAtomic& a, intptr_t b) {
+    return _InterlockedExchangeAdd64((volatile __int64*)&a, b);
+}
+
+static inline intptr_t AtomicSwap(TAtomic* a, intptr_t b) {
+    return _InterlockedExchange64((volatile __int64*)a, b);
+}
+
+static inline bool AtomicCas(TAtomic* a, intptr_t exchange, intptr_t compare) {
+    return _InterlockedCompareExchange64((volatile __int64*)a, exchange, compare) == compare;
+}
+
+static inline intptr_t AtomicGetAndCas(TAtomic* a, intptr_t exchange, intptr_t compare) {
+    return _InterlockedCompareExchange64((volatile __int64*)a, exchange, compare);
+}
+
+static inline intptr_t AtomicOr(TAtomic& a, intptr_t b) {
+    return _InterlockedOr64(&a, b) | b;
+}
+
+static inline intptr_t AtomicAnd(TAtomic& a, intptr_t b) {
+    return _InterlockedAnd64(&a, b) & b;
+}
+
+static inline intptr_t AtomicXor(TAtomic& a, intptr_t b) {
+    return _InterlockedXor64(&a, b) ^ b;
+}
+
+#endif // _x86_
+
+//TODO
+static inline void AtomicBarrier() {
+    TAtomic val = 0;
+
+    AtomicSwap(&val, 0);
+}
--- a/contrib/lfalloc/src/util/system/compiler.h
+++ b/contrib/lfalloc/src/util/system/compiler.h
@ -0,0 +1,617 @@
+#pragma once
+
+// useful cross-platfrom definitions for compilers
+
+/**
+ * @def Y_FUNC_SIGNATURE
+ *
+ * Use this macro to get pretty function name (see example).
+ *
+ * @code
+ * void Hi() {
+ *     Cout << Y_FUNC_SIGNATURE << Endl;
+ * }
+
+ * template <typename T>
+ * void Do() {
+ *     Cout << Y_FUNC_SIGNATURE << Endl;
+ * }
+
+ * int main() {
+ *    Hi();         // void Hi()
+ *    Do<int>();    // void Do() [T = int]
+ *    Do<TString>(); // void Do() [T = TString]
+ * }
+ * @endcode
+ */
+#if defined(__GNUC__)
+#define Y_FUNC_SIGNATURE __PRETTY_FUNCTION__
+#elif defined(_MSC_VER)
+#define Y_FUNC_SIGNATURE __FUNCSIG__
+#else
+#define Y_FUNC_SIGNATURE ""
+#endif
+
+#ifdef __GNUC__
+#define Y_PRINTF_FORMAT(n, m) __attribute__((__format__(__printf__, n, m)))
+#endif
+
+#ifndef Y_PRINTF_FORMAT
+#define Y_PRINTF_FORMAT(n, m)
+#endif
+
+#if defined(__clang__)
+#define Y_NO_SANITIZE(...) __attribute__((no_sanitize(__VA_ARGS__)))
+#endif
+
+#if !defined(Y_NO_SANITIZE)
+#define Y_NO_SANITIZE(...)
+#endif
+
+/**
+ * @def Y_DECLARE_UNUSED
+ *
+ * Macro is needed to silence compiler warning about unused entities (e.g. function or argument).
+ *
+ * @code
+ * Y_DECLARE_UNUSED int FunctionUsedSolelyForDebugPurposes();
+ * assert(FunctionUsedSolelyForDebugPurposes() == 42);
+ *
+ * void Foo(const int argumentUsedOnlyForDebugPurposes Y_DECLARE_UNUSED) {
+ *     assert(argumentUsedOnlyForDebugPurposes == 42);
+ *     // however you may as well omit `Y_DECLARE_UNUSED` and use `UNUSED` macro instead
+ *     Y_UNUSED(argumentUsedOnlyForDebugPurposes);
+ * }
+ * @endcode
+ */
+#ifdef __GNUC__
+#define Y_DECLARE_UNUSED __attribute__((unused))
+#endif
+
+#ifndef Y_DECLARE_UNUSED
+#define Y_DECLARE_UNUSED
+#endif
+
+#if defined(__GNUC__)
+#define Y_LIKELY(Cond) __builtin_expect(!!(Cond), 1)
+#define Y_UNLIKELY(Cond) __builtin_expect(!!(Cond), 0)
+#define Y_PREFETCH_READ(Pointer, Priority) __builtin_prefetch((const void*)(Pointer), 0, Priority)
+#define Y_PREFETCH_WRITE(Pointer, Priority) __builtin_prefetch((const void*)(Pointer), 1, Priority)
+#endif
+
+/**
+ * @def Y_FORCE_INLINE
+ *
+ * Macro to use in place of 'inline' in function declaration/definition to force
+ * it to be inlined.
+ */
+#if !defined(Y_FORCE_INLINE)
+#if defined(CLANG_COVERAGE)
+#/* excessive __always_inline__ might significantly slow down compilation of an instrumented unit */
+#define Y_FORCE_INLINE inline
+#elif defined(_MSC_VER)
+#define Y_FORCE_INLINE __forceinline
+#elif defined(__GNUC__)
+#/* Clang also defines __GNUC__ (as 4) */
+#define Y_FORCE_INLINE inline __attribute__((__always_inline__))
+#else
+#define Y_FORCE_INLINE inline
+#endif
+#endif
+
+/**
+ * @def Y_NO_INLINE
+ *
+ * Macro to use in place of 'inline' in function declaration/definition to
+ * prevent it from being inlined.
+ */
+#if !defined(Y_NO_INLINE)
+#if defined(_MSC_VER)
+#define Y_NO_INLINE __declspec(noinline)
+#elif defined(__GNUC__) || defined(__INTEL_COMPILER)
+#/* Clang also defines __GNUC__ (as 4) */
+#define Y_NO_INLINE __attribute__((__noinline__))
+#else
+#define Y_NO_INLINE
+#endif
+#endif
+
+//to cheat compiler about strict aliasing or similar problems
+#if defined(__GNUC__)
+#define Y_FAKE_READ(X)                  \
+    do {                                \
+        __asm__ __volatile__(""         \
+                             :          \
+                             : "m"(X)); \
+    } while (0)
+
+#define Y_FAKE_WRITE(X)                  \
+    do {                                 \
+        __asm__ __volatile__(""          \
+                             : "=m"(X)); \
+    } while (0)
+#endif
+
+#if !defined(Y_FAKE_READ)
+#define Y_FAKE_READ(X)
+#endif
+
+#if !defined(Y_FAKE_WRITE)
+#define Y_FAKE_WRITE(X)
+#endif
+
+#ifndef Y_PREFETCH_READ
+#define Y_PREFETCH_READ(Pointer, Priority) (void)(const void*)(Pointer), (void)Priority
+#endif
+
+#ifndef Y_PREFETCH_WRITE
+#define Y_PREFETCH_WRITE(Pointer, Priority) (void)(const void*)(Pointer), (void)Priority
+#endif
+
+#ifndef Y_LIKELY
+#define Y_LIKELY(Cond) (Cond)
+#define Y_UNLIKELY(Cond) (Cond)
+#endif
+
+#ifdef __GNUC__
+#define _packed __attribute__((packed))
+#else
+#define _packed
+#endif
+
+#if defined(__GNUC__)
+#define Y_WARN_UNUSED_RESULT __attribute__((warn_unused_result))
+#endif
+
+#ifndef Y_WARN_UNUSED_RESULT
+#define Y_WARN_UNUSED_RESULT
+#endif
+
+#if defined(__GNUC__)
+#define Y_HIDDEN __attribute__((visibility("hidden")))
+#endif
+
+#if !defined(Y_HIDDEN)
+#define Y_HIDDEN
+#endif
+
+#if defined(__GNUC__)
+#define Y_PUBLIC __attribute__((visibility("default")))
+#endif
+
+#if !defined(Y_PUBLIC)
+#define Y_PUBLIC
+#endif
+
+#if !defined(Y_UNUSED) && !defined(__cplusplus)
+#define Y_UNUSED(var) (void)(var)
+#endif
+#if !defined(Y_UNUSED) && defined(__cplusplus)
+template <class... Types>
+constexpr Y_FORCE_INLINE int Y_UNUSED(Types&&...) {
+    return 0;
+};
+#endif
+
+/**
+ * @def Y_ASSUME
+ *
+ * Macro that tells the compiler that it can generate optimized code
+ * as if the given expression will always evaluate true.
+ * The behavior is undefined if it ever evaluates false.
+ *
+ * @code
+ * // factored into a function so that it's testable
+ * inline int Avg(int x, int y) {
+ *     if (x >= 0 && y >= 0) {
+ *         return (static_cast<unsigned>(x) + static_cast<unsigned>(y)) >> 1;
+ *     } else {
+ *         // a slower implementation
+ *     }
+ * }
+ *
+ * // we know that xs and ys are non-negative from domain knowledge,
+ * // but we can't change the types of xs and ys because of API constrains
+ * int Foo(const TVector<int>& xs, const TVector<int>& ys) {
+ *     TVector<int> avgs;
+ *     avgs.resize(xs.size());
+ *     for (size_t i = 0; i < xs.size(); ++i) {
+ *         auto x = xs[i];
+ *         auto y = ys[i];
+ *         Y_ASSUME(x >= 0);
+ *         Y_ASSUME(y >= 0);
+ *         xs[i] = Avg(x, y);
+ *     }
+ * }
+ * @endcode
+ */
+#if defined(__GNUC__)
+#define Y_ASSUME(condition) ((condition) ? (void)0 : __builtin_unreachable())
+#elif defined(_MSC_VER)
+#define Y_ASSUME(condition) __assume(condition)
+#else
+#define Y_ASSUME(condition) Y_UNUSED(condition)
+#endif
+
+#ifdef __cplusplus
+[[noreturn]]
+#endif
+Y_HIDDEN void _YandexAbort();
+
+/**
+ * @def Y_UNREACHABLE
+ *
+ * Macro that marks the rest of the code branch unreachable.
+ * The behavior is undefined if it's ever reached.
+ *
+ * @code
+ * switch (i % 3) {
+ * case 0:
+ *     return foo;
+ * case 1:
+ *     return bar;
+ * case 2:
+ *     return baz;
+ * default:
+ *     Y_UNREACHABLE();
+ * }
+ * @endcode
+ */
+#if defined(__GNUC__) || defined(_MSC_VER)
+#define Y_UNREACHABLE() Y_ASSUME(0)
+#else
+#define Y_UNREACHABLE() _YandexAbort()
+#endif
+
+#if defined(undefined_sanitizer_enabled)
+#define _ubsan_enabled_
+#endif
+
+#ifdef __clang__
+
+#if __has_feature(thread_sanitizer)
+#define _tsan_enabled_
+#endif
+#if __has_feature(memory_sanitizer)
+#define _msan_enabled_
+#endif
+#if __has_feature(address_sanitizer)
+#define _asan_enabled_
+#endif
+
+#else
+
+#if defined(thread_sanitizer_enabled) || defined(__SANITIZE_THREAD__)
+#define _tsan_enabled_
+#endif
+#if defined(memory_sanitizer_enabled)
+#define _msan_enabled_
+#endif
+#if defined(address_sanitizer_enabled) || defined(__SANITIZE_ADDRESS__)
+#define _asan_enabled_
+#endif
+
+#endif
+
+#if defined(_asan_enabled_) || defined(_msan_enabled_) || defined(_tsan_enabled_) || defined(_ubsan_enabled_)
+#define _san_enabled_
+#endif
+
+#if defined(_MSC_VER)
+#define __PRETTY_FUNCTION__ __FUNCSIG__
+#endif
+
+#if defined(__GNUC__)
+#define Y_WEAK __attribute__((weak))
+#else
+#define Y_WEAK
+#endif
+
+#if defined(__CUDACC_VER_MAJOR__)
+#define Y_CUDA_AT_LEAST(x, y) (__CUDACC_VER_MAJOR__ > x || (__CUDACC_VER_MAJOR__ == x && __CUDACC_VER_MINOR__ >= y))
+#else
+#define Y_CUDA_AT_LEAST(x, y) 0
+#endif
+
+// NVidia CUDA C++ Compiler did not know about noexcept keyword until version 9.0
+#if !Y_CUDA_AT_LEAST(9, 0)
+#if defined(__CUDACC__) && !defined(noexcept)
+#define noexcept throw ()
+#endif
+#endif
+
+#if defined(__GNUC__)
+#define Y_COLD __attribute__((cold))
+#define Y_LEAF __attribute__((leaf))
+#define Y_WRAPPER __attribute__((artificial))
+#else
+#define Y_COLD
+#define Y_LEAF
+#define Y_WRAPPER
+#endif
+
+/**
+ * @def Y_PRAGMA
+ *
+ * Macro for use in other macros to define compiler pragma
+ * See below for other usage examples
+ *
+ * @code
+ * #if defined(__clang__) || defined(__GNUC__)
+ * #define Y_PRAGMA_NO_WSHADOW \
+ *     Y_PRAGMA("GCC diagnostic ignored \"-Wshadow\"")
+ * #elif defined(_MSC_VER)
+ * #define Y_PRAGMA_NO_WSHADOW \
+ *     Y_PRAGMA("warning(disable:4456 4457")
+ * #else
+ * #define Y_PRAGMA_NO_WSHADOW
+ * #endif
+ * @endcode
+ */
+#if defined(__clang__) || defined(__GNUC__)
+#define Y_PRAGMA(x) _Pragma(x)
+#elif defined(_MSC_VER)
+#define Y_PRAGMA(x) __pragma(x)
+#else
+#define Y_PRAGMA(x)
+#endif
+
+/**
+ * @def Y_PRAGMA_DIAGNOSTIC_PUSH
+ *
+ * Cross-compiler pragma to save diagnostic settings
+ *
+ * @see
+ *     GCC: https://gcc.gnu.org/onlinedocs/gcc/Diagnostic-Pragmas.html
+ *     MSVC: https://msdn.microsoft.com/en-us/library/2c8f766e.aspx
+ *     Clang: https://clang.llvm.org/docs/UsersManual.html#controlling-diagnostics-via-pragmas
+ *
+ * @code
+ * Y_PRAGMA_DIAGNOSTIC_PUSH
+ * @endcode
+ */
+#if defined(__clang__) || defined(__GNUC__)
+#define Y_PRAGMA_DIAGNOSTIC_PUSH \
+    Y_PRAGMA("GCC diagnostic push")
+#elif defined(_MSC_VER)
+#define Y_PRAGMA_DIAGNOSTIC_PUSH \
+    Y_PRAGMA(warning(push))
+#else
+#define Y_PRAGMA_DIAGNOSTIC_PUSH
+#endif
+
+/**
+ * @def Y_PRAGMA_DIAGNOSTIC_POP
+ *
+ * Cross-compiler pragma to restore diagnostic settings
+ *
+ * @see
+ *     GCC: https://gcc.gnu.org/onlinedocs/gcc/Diagnostic-Pragmas.html
+ *     MSVC: https://msdn.microsoft.com/en-us/library/2c8f766e.aspx
+ *     Clang: https://clang.llvm.org/docs/UsersManual.html#controlling-diagnostics-via-pragmas
+ *
+ * @code
+ * Y_PRAGMA_DIAGNOSTIC_POP
+ * @endcode
+ */
+#if defined(__clang__) || defined(__GNUC__)
+#define Y_PRAGMA_DIAGNOSTIC_POP \
+    Y_PRAGMA("GCC diagnostic pop")
+#elif defined(_MSC_VER)
+#define Y_PRAGMA_DIAGNOSTIC_POP \
+    Y_PRAGMA(warning(pop))
+#else
+#define Y_PRAGMA_DIAGNOSTIC_POP
+#endif
+
+/**
+ * @def Y_PRAGMA_NO_WSHADOW
+ *
+ * Cross-compiler pragma to disable warnings about shadowing variables
+ *
+ * @code
+ * Y_PRAGMA_DIAGNOSTIC_PUSH
+ * Y_PRAGMA_NO_WSHADOW
+ *
+ * // some code which use variable shadowing, e.g.:
+ *
+ * for (int i = 0; i < 100; ++i) {
+ *   Use(i);
+ *
+ *   for (int i = 42; i < 100500; ++i) { // this i is shadowing previous i
+ *       AnotherUse(i);
+ *    }
+ * }
+ *
+ * Y_PRAGMA_DIAGNOSTIC_POP
+ * @endcode
+ */
+#if defined(__clang__) || defined(__GNUC__)
+#define Y_PRAGMA_NO_WSHADOW \
+    Y_PRAGMA("GCC diagnostic ignored \"-Wshadow\"")
+#elif defined(_MSC_VER)
+#define Y_PRAGMA_NO_WSHADOW \
+    Y_PRAGMA(warning(disable : 4456 4457))
+#else
+#define Y_PRAGMA_NO_WSHADOW
+#endif
+
+/**
+ * @ def Y_PRAGMA_NO_UNUSED_FUNCTION
+ *
+ * Cross-compiler pragma to disable warnings about unused functions
+ *
+ * @see
+ *     GCC: https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html
+ *     Clang: https://clang.llvm.org/docs/DiagnosticsReference.html#wunused-function
+ *     MSVC: there is no such warning
+ *
+ * @code
+ * Y_PRAGMA_DIAGNOSTIC_PUSH
+ * Y_PRAGMA_NO_UNUSED_FUNCTION
+ *
+ * // some code which introduces a function which later will not be used, e.g.:
+ *
+ * void Foo() {
+ * }
+ *
+ * int main() {
+ *     return 0; // Foo() never called
+ * }
+ *
+ * Y_PRAGMA_DIAGNOSTIC_POP
+ * @endcode
+ */
+#if defined(__clang__) || defined(__GNUC__)
+#define Y_PRAGMA_NO_UNUSED_FUNCTION \
+    Y_PRAGMA("GCC diagnostic ignored \"-Wunused-function\"")
+#else
+#define Y_PRAGMA_NO_UNUSED_FUNCTION
+#endif
+
+/**
+ * @ def Y_PRAGMA_NO_UNUSED_PARAMETER
+ *
+ * Cross-compiler pragma to disable warnings about unused function parameters
+ *
+ * @see
+ *     GCC: https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html
+ *     Clang: https://clang.llvm.org/docs/DiagnosticsReference.html#wunused-parameter
+ *     MSVC: https://msdn.microsoft.com/en-us/library/26kb9fy0.aspx
+ *
+ * @code
+ * Y_PRAGMA_DIAGNOSTIC_PUSH
+ * Y_PRAGMA_NO_UNUSED_PARAMETER
+ *
+ * // some code which introduces a function with unused parameter, e.g.:
+ *
+ * void foo(int a) {
+ *     // a is not referenced
+ * }
+ *
+ * int main() {
+ *     foo(1);
+ *     return 0;
+ * }
+ *
+ * Y_PRAGMA_DIAGNOSTIC_POP
+ * @endcode
+ */
+#if defined(__clang__) || defined(__GNUC__)
+#define Y_PRAGMA_NO_UNUSED_PARAMETER \
+    Y_PRAGMA("GCC diagnostic ignored \"-Wunused-parameter\"")
+#elif defined(_MSC_VER)
+#define Y_PRAGMA_NO_UNUSED_PARAMETER \
+    Y_PRAGMA(warning(disable : 4100))
+#else
+#define Y_PRAGMA_NO_UNUSED_PARAMETER
+#endif
+
+/**
+ * @def Y_PRAGMA_NO_DEPRECATED
+ *
+ * Cross compiler pragma to disable warnings and errors about deprecated
+ *
+ * @see
+ *     GCC: https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html
+ *     Clang: https://clang.llvm.org/docs/DiagnosticsReference.html#wdeprecated
+ *     MSVC: https://docs.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-3-c4996?view=vs-2017
+ *
+ * @code
+ * Y_PRAGMA_DIAGNOSTIC_PUSH
+ * Y_PRAGMA_NO_DEPRECATED
+ *
+ * [deprecated] void foo() {
+ *     // ...
+ * }
+ *
+ * int main() {
+ *     foo();
+ *     return 0;
+ * }
+ *
+ * Y_PRAGMA_DIAGNOSTIC_POP
+ * @endcode
+ */
+#if defined(__clang__) || defined(__GNUC__)
+#define Y_PRAGMA_NO_DEPRECATED \
+    Y_PRAGMA("GCC diagnostic ignored \"-Wdeprecated\"")
+#elif defined(_MSC_VER)
+#define Y_PRAGMA_NO_DEPRECATED \
+    Y_PRAGMA(warning(disable : 4996))
+#else
+#define Y_PRAGMA_NO_DEPRECATED
+#endif
+
+#if defined(__clang__) || defined(__GNUC__)
+/**
+ * @def Y_CONST_FUNCTION
+   methods and functions, marked with this method are promised to:
+     1. do not have side effects
+     2. this method do not read global memory
+   NOTE: this attribute can't be set for methods that depend on data, pointed by this
+   this allow compilers to do hard optimization of that functions
+   NOTE: in common case this attribute can't be set if method have pointer-arguments
+   NOTE: as result there no any reason to discard result of such method
+*/
+#define Y_CONST_FUNCTION [[gnu::const]]
+#endif
+
+#if !defined(Y_CONST_FUNCTION)
+#define Y_CONST_FUNCTION
+#endif
+
+#if defined(__clang__) || defined(__GNUC__)
+/**
+ * @def Y_PURE_FUNCTION
+   methods and functions, marked with this method are promised to:
+     1. do not have side effects
+     2. result will be the same if no global memory changed
+   this allow compilers to do hard optimization of that functions
+   NOTE: as result there no any reason to discard result of such method
+*/
+#define Y_PURE_FUNCTION [[gnu::pure]]
+#endif
+
+#if !defined(Y_PURE_FUNCTION)
+#define Y_PURE_FUNCTION
+#endif
+
+/**
+ * @ def Y_HAVE_INT128
+ *
+ * Defined when the compiler supports __int128 extension
+ *
+ * @code
+ *
+ * #if defined(Y_HAVE_INT128)
+ *     __int128 myVeryBigInt = 12345678901234567890;
+ * #endif
+ *
+ * @endcode
+ */
+#if defined(__SIZEOF_INT128__)
+#define Y_HAVE_INT128 1
+#endif
+
+/**
+ * XRAY macro must be passed to compiler if XRay is enabled.
+ *
+ * Define everything XRay-specific as a macro so that it doesn't cause errors
+ * for compilers that doesn't support XRay.
+ */
+#if defined(XRAY) && defined(__cplusplus)
+#include <xray/xray_interface.h>
+#define Y_XRAY_ALWAYS_INSTRUMENT [[clang::xray_always_instrument]]
+#define Y_XRAY_NEVER_INSTRUMENT [[clang::xray_never_instrument]]
+#define Y_XRAY_CUSTOM_EVENT(__string, __length) \
+    do {                                        \
+        __xray_customevent(__string, __length); \
+    } while (0)
+#else
+#define Y_XRAY_ALWAYS_INSTRUMENT
+#define Y_XRAY_NEVER_INSTRUMENT
+#define Y_XRAY_CUSTOM_EVENT(__string, __length) \
+    do {                                        \
+    } while (0)
+#endif
--- a/contrib/lfalloc/src/util/system/defaults.h
+++ b/contrib/lfalloc/src/util/system/defaults.h
@ -0,0 +1,168 @@
+#pragma once
+
+#include "platform.h"
+
+#if defined _unix_
+#define LOCSLASH_C '/'
+#define LOCSLASH_S "/"
+#else
+#define LOCSLASH_C '\\'
+#define LOCSLASH_S "\\"
+#endif // _unix_
+
+#if defined(__INTEL_COMPILER) && defined(__cplusplus)
+#include <new>
+#endif
+
+// low and high parts of integers
+#if !defined(_win_)
+#include <sys/param.h>
+#endif
+
+#if defined(BSD) || defined(_android_)
+
+#if defined(BSD)
+#include <machine/endian.h>
+#endif
+
+#if defined(_android_)
+#include <endian.h>
+#endif
+
+#if (BYTE_ORDER == LITTLE_ENDIAN)
+#define _little_endian_
+#elif (BYTE_ORDER == BIG_ENDIAN)
+#define _big_endian_
+#else
+#error unknown endian not supported
+#endif
+
+#elif (defined(_sun_) && !defined(__i386__)) || defined(_hpux_) || defined(WHATEVER_THAT_HAS_BIG_ENDIAN)
+#define _big_endian_
+#else
+#define _little_endian_
+#endif
+
+// alignment
+#if (defined(_sun_) && !defined(__i386__)) || defined(_hpux_) || defined(__alpha__) || defined(__ia64__) || defined(WHATEVER_THAT_NEEDS_ALIGNING_QUADS)
+#define _must_align8_
+#endif
+
+#if (defined(_sun_) && !defined(__i386__)) || defined(_hpux_) || defined(__alpha__) || defined(__ia64__) || defined(WHATEVER_THAT_NEEDS_ALIGNING_LONGS)
+#define _must_align4_
+#endif
+
+#if (defined(_sun_) && !defined(__i386__)) || defined(_hpux_) || defined(__alpha__) || defined(__ia64__) || defined(WHATEVER_THAT_NEEDS_ALIGNING_SHORTS)
+#define _must_align2_
+#endif
+
+#if defined(__GNUC__)
+#define alias_hack __attribute__((__may_alias__))
+#endif
+
+#ifndef alias_hack
+#define alias_hack
+#endif
+
+#include "types.h"
+
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
+#define PRAGMA(x) _Pragma(#x)
+#define RCSID(idstr) PRAGMA(comment(exestr, idstr))
+#else
+#define RCSID(idstr) static const char rcsid[] = idstr
+#endif
+
+#include "compiler.h"
+
+#ifdef _win_
+#include <malloc.h>
+#elif defined(_sun_)
+#include <alloca.h>
+#endif
+
+#ifdef NDEBUG
+#define Y_IF_DEBUG(X)
+#else
+#define Y_IF_DEBUG(X) X
+#endif
+
+/**
+ * @def Y_ARRAY_SIZE
+ *
+ * This macro is needed to get number of elements in a statically allocated fixed size array. The
+ * expression is a compile-time constant and therefore can be used in compile time computations.
+ *
+ * @code
+ * enum ENumbers {
+ *     EN_ONE,
+ *     EN_TWO,
+ *     EN_SIZE
+ * }
+ *
+ * const char* NAMES[] = {
+ *     "one",
+ *     "two"
+ * }
+ *
+ * static_assert(Y_ARRAY_SIZE(NAMES) == EN_SIZE, "you should define `NAME` for each enumeration");
+ * @endcode
+ *
+ * This macro also catches type errors. If you see a compiler error like "warning: division by zero
+ * is undefined" when using `Y_ARRAY_SIZE` then you are probably giving it a pointer.
+ *
+ * Since all of our code is expected to work on a 64 bit platform where pointers are 8 bytes we may
+ * falsefully accept pointers to types of sizes that are divisors of 8 (1, 2, 4 and 8).
+ */
+#if defined(__cplusplus)
+namespace NArraySizePrivate {
+    template <class T>
+    struct TArraySize;
+
+    template <class T, size_t N>
+    struct TArraySize<T[N]> {
+        enum {
+            Result = N
+        };
+    };
+
+    template <class T, size_t N>
+    struct TArraySize<T (&)[N]> {
+        enum {
+            Result = N
+        };
+    };
+}
+
+#define Y_ARRAY_SIZE(arr) ((size_t)::NArraySizePrivate::TArraySize<decltype(arr)>::Result)
+#else
+#undef Y_ARRAY_SIZE
+#define Y_ARRAY_SIZE(arr) \
+    ((sizeof(arr) / sizeof((arr)[0])) / static_cast<size_t>(!(sizeof(arr) % sizeof((arr)[0]))))
+#endif
+
+#undef Y_ARRAY_BEGIN
+#define Y_ARRAY_BEGIN(arr) (arr)
+
+#undef Y_ARRAY_END
+#define Y_ARRAY_END(arr) ((arr) + Y_ARRAY_SIZE(arr))
+
+/**
+ * Concatenates two symbols, even if one of them is itself a macro.
+ */
+#define Y_CAT(X, Y) Y_CAT_I(X, Y)
+#define Y_CAT_I(X, Y) Y_CAT_II(X, Y)
+#define Y_CAT_II(X, Y) X##Y
+
+#define Y_STRINGIZE(X) UTIL_PRIVATE_STRINGIZE_AUX(X)
+#define UTIL_PRIVATE_STRINGIZE_AUX(X) #X
+
+#if defined(__COUNTER__)
+#define Y_GENERATE_UNIQUE_ID(N) Y_CAT(N, __COUNTER__)
+#endif
+
+#if !defined(Y_GENERATE_UNIQUE_ID)
+#define Y_GENERATE_UNIQUE_ID(N) Y_CAT(N, __LINE__)
+#endif
+
+#define NPOS ((size_t)-1)
--- a/contrib/lfalloc/src/util/system/platform.h
+++ b/contrib/lfalloc/src/util/system/platform.h
@ -0,0 +1,242 @@
+#pragma once
+
+// What OS ?
+// our definition has the form _{osname}_
+
+#if defined(_WIN64)
+#define _win64_
+#define _win32_
+#elif defined(__WIN32__) || defined(_WIN32) // _WIN32 is also defined by the 64-bit compiler for backward compatibility
+#define _win32_
+#else
+#define _unix_
+#if defined(__sun__) || defined(sun) || defined(sparc) || defined(__sparc)
+#define _sun_
+#endif
+#if defined(__hpux__)
+#define _hpux_
+#endif
+#if defined(__linux__)
+#define _linux_
+#endif
+#if defined(__FreeBSD__)
+#define _freebsd_
+#endif
+#if defined(__CYGWIN__)
+#define _cygwin_
+#endif
+#if defined(__APPLE__)
+#define _darwin_
+#endif
+#if defined(__ANDROID__)
+#define _android_
+#endif
+#endif
+
+#if defined(__IOS__)
+#define _ios_
+#endif
+
+#if defined(_linux_)
+#if defined(_musl_)
+//nothing to do
+#elif defined(_android_)
+#define _bionic_
+#else
+#define _glibc_
+#endif
+#endif
+
+#if defined(_darwin_)
+#define unix
+#define __unix__
+#endif
+
+#if defined(_win32_) || defined(_win64_)
+#define _win_
+#endif
+
+#if defined(__arm__) || defined(__ARM__) || defined(__ARM_NEON) || defined(__aarch64__) || defined(_M_ARM)
+#if defined(__arm64) || defined(__arm64__) || defined(__aarch64__)
+#define _arm64_
+#else
+#define _arm32_
+#endif
+#endif
+
+#if defined(_arm64_) || defined(_arm32_)
+#define _arm_
+#endif
+
+/* __ia64__ and __x86_64__      - defined by GNU C.
+ * _M_IA64, _M_X64, _M_AMD64    - defined by Visual Studio.
+ *
+ * Microsoft can define _M_IX86, _M_AMD64 (before Visual Studio 8)
+ * or _M_X64 (starting in Visual Studio 8).
+ */
+#if defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64)
+#define _x86_64_
+#endif
+
+#if defined(__i386__) || defined(_M_IX86)
+#define _i386_
+#endif
+
+#if defined(__ia64__) || defined(_M_IA64)
+#define _ia64_
+#endif
+
+#if defined(__powerpc__)
+#define _ppc_
+#endif
+
+#if defined(__powerpc64__)
+#define _ppc64_
+#endif
+
+#if !defined(sparc) && !defined(__sparc) && !defined(__hpux__) && !defined(__alpha__) && !defined(_ia64_) && !defined(_x86_64_) && !defined(_arm_) && !defined(_i386_) && !defined(_ppc_) && !defined(_ppc64_)
+#error "platform not defined, please, define one"
+#endif
+
+#if defined(_x86_64_) || defined(_i386_)
+#define _x86_
+#endif
+
+#if defined(__MIC__)
+#define _mic_
+#define _k1om_
+#endif
+
+// stdio or MessageBox
+#if defined(__CONSOLE__) || defined(_CONSOLE)
+#define _console_
+#endif
+#if (defined(_win_) && !defined(_console_))
+#define _windows_
+#elif !defined(_console_)
+#define _console_
+#endif
+
+#if defined(__SSE__) || defined(SSE_ENABLED)
+#define _sse_
+#endif
+
+#if defined(__SSE2__) || defined(SSE2_ENABLED)
+#define _sse2_
+#endif
+
+#if defined(__SSE3__) || defined(SSE3_ENABLED)
+#define _sse3_
+#endif
+
+#if defined(__SSSE3__) || defined(SSSE3_ENABLED)
+#define _ssse3_
+#endif
+
+#if defined(POPCNT_ENABLED)
+#define _popcnt_
+#endif
+
+#if defined(__DLL__) || defined(_DLL)
+#define _dll_
+#endif
+
+// 16, 32 or 64
+#if defined(__sparc_v9__) || defined(_x86_64_) || defined(_ia64_) || defined(_arm64_) || defined(_ppc64_)
+#define _64_
+#else
+#define _32_
+#endif
+
+/* All modern 64-bit Unix systems use scheme LP64 (long, pointers are 64-bit).
+ * Microsoft uses a different scheme: LLP64 (long long, pointers are 64-bit).
+ *
+ * Scheme          LP64   LLP64
+ * char              8      8
+ * short            16     16
+ * int              32     32
+ * long             64     32
+ * long long        64     64
+ * pointer          64     64
+ */
+
+#if defined(_32_)
+#define SIZEOF_PTR 4
+#elif defined(_64_)
+#define SIZEOF_PTR 8
+#endif
+
+#define PLATFORM_DATA_ALIGN SIZEOF_PTR
+
+#if !defined(SIZEOF_PTR)
+#error todo
+#endif
+
+#define SIZEOF_CHAR 1
+#define SIZEOF_UNSIGNED_CHAR 1
+#define SIZEOF_SHORT 2
+#define SIZEOF_UNSIGNED_SHORT 2
+#define SIZEOF_INT 4
+#define SIZEOF_UNSIGNED_INT 4
+
+#if defined(_32_)
+#define SIZEOF_LONG 4
+#define SIZEOF_UNSIGNED_LONG 4
+#elif defined(_64_)
+#if defined(_win_)
+#define SIZEOF_LONG 4
+#define SIZEOF_UNSIGNED_LONG 4
+#else
+#define SIZEOF_LONG 8
+#define SIZEOF_UNSIGNED_LONG 8
+#endif // _win_
+#endif // _32_
+
+#if !defined(SIZEOF_LONG)
+#error todo
+#endif
+
+#define SIZEOF_LONG_LONG 8
+#define SIZEOF_UNSIGNED_LONG_LONG 8
+
+#undef SIZEOF_SIZE_T // in case we include <Python.h> which defines it, too
+#define SIZEOF_SIZE_T SIZEOF_PTR
+
+#if defined(__INTEL_COMPILER)
+#pragma warning(disable 1292)
+#pragma warning(disable 1469)
+#pragma warning(disable 193)
+#pragma warning(disable 271)
+#pragma warning(disable 383)
+#pragma warning(disable 424)
+#pragma warning(disable 444)
+#pragma warning(disable 584)
+#pragma warning(disable 593)
+#pragma warning(disable 981)
+#pragma warning(disable 1418)
+#pragma warning(disable 304)
+#pragma warning(disable 810)
+#pragma warning(disable 1029)
+#pragma warning(disable 1419)
+#pragma warning(disable 177)
+#pragma warning(disable 522)
+#pragma warning(disable 858)
+#pragma warning(disable 111)
+#pragma warning(disable 1599)
+#pragma warning(disable 411)
+#pragma warning(disable 304)
+#pragma warning(disable 858)
+#pragma warning(disable 444)
+#pragma warning(disable 913)
+#pragma warning(disable 310)
+#pragma warning(disable 167)
+#pragma warning(disable 180)
+#pragma warning(disable 1572)
+#endif
+
+#if defined(_MSC_VER)
+#undef _WINSOCKAPI_
+#define _WINSOCKAPI_
+#undef NOMINMAX
+#define NOMINMAX
+#endif
--- a/contrib/lfalloc/src/util/system/types.h
+++ b/contrib/lfalloc/src/util/system/types.h
@ -0,0 +1,117 @@
+#pragma once
+
+// DO_NOT_STYLE
+
+#include "platform.h"
+
+#include <inttypes.h>
+
+typedef int8_t i8;
+typedef int16_t i16;
+typedef uint8_t ui8;
+typedef uint16_t ui16;
+
+typedef int yssize_t;
+#define PRIYSZT "d"
+
+#if defined(_darwin_) && defined(_32_)
+typedef unsigned long ui32;
+typedef long i32;
+#else
+typedef uint32_t ui32;
+typedef int32_t i32;
+#endif
+
+#if defined(_darwin_) && defined(_64_)
+typedef unsigned long ui64;
+typedef long i64;
+#else
+typedef uint64_t ui64;
+typedef int64_t i64;
+#endif
+
+#define LL(number) INT64_C(number)
+#define ULL(number) UINT64_C(number)
+
+// Macro for size_t and ptrdiff_t types
+#if defined(_32_)
+#   if defined(_darwin_)
+#       define PRISZT "lu"
+#       undef PRIi32
+#       define PRIi32 "li"
+#       undef SCNi32
+#       define SCNi32 "li"
+#       undef PRId32
+#       define PRId32 "li"
+#       undef SCNd32
+#       define SCNd32 "li"
+#       undef PRIu32
+#       define PRIu32 "lu"
+#       undef SCNu32
+#       define SCNu32 "lu"
+#       undef PRIx32
+#       define PRIx32 "lx"
+#       undef SCNx32
+#       define SCNx32 "lx"
+#   elif !defined(_cygwin_)
+#       define PRISZT PRIu32
+#   else
+#       define PRISZT "u"
+#   endif
+#   define SCNSZT SCNu32
+#   define PRIPDT PRIi32
+#   define SCNPDT SCNi32
+#   define PRITMT PRIi32
+#   define SCNTMT SCNi32
+#elif defined(_64_)
+#   if defined(_darwin_)
+#       define PRISZT "lu"
+#       undef PRIu64
+#       define PRIu64 PRISZT
+#       undef PRIx64
+#       define PRIx64 "lx"
+#       undef PRIX64
+#       define PRIX64 "lX"
+#       undef PRId64
+#       define PRId64 "ld"
+#       undef PRIi64
+#       define PRIi64 "li"
+#       undef SCNi64
+#       define SCNi64 "li"
+#       undef SCNu64
+#       define SCNu64 "lu"
+#       undef SCNx64
+#       define SCNx64 "lx"
+#   else
+#       define PRISZT PRIu64
+#   endif
+#   define SCNSZT SCNu64
+#   define PRIPDT PRIi64
+#   define SCNPDT SCNi64
+#   define PRITMT PRIi64
+#   define SCNTMT SCNi64
+#else
+#   error "Unsupported platform"
+#endif
+
+// SUPERLONG
+#if !defined(DONT_USE_SUPERLONG) && !defined(SUPERLONG_MAX)
+#define SUPERLONG_MAX ~LL(0)
+typedef i64 SUPERLONG;
+#endif
+
+// UNICODE
+// UCS-2, native byteorder
+typedef ui16 wchar16;
+// internal symbol type: UTF-16LE
+typedef wchar16 TChar;
+typedef ui32 wchar32;
+
+#if defined(_MSC_VER)
+#include <basetsd.h>
+typedef SSIZE_T ssize_t;
+#define HAVE_SSIZE_T 1
+#include <wchar.h>
+#endif
+
+#include <sys/types.h>
--- a/contrib/poco
+++ b/contrib/poco
@ -1 +1 @@
-Subproject commit fe5505e56c27b6ecb0dcbc40c49dc2caf4e9637f
+Subproject commit 29439cf7fa32c1a2d62d925bb6d6a3f14668a4a2
--- a/dbms/CMakeLists.txt
+++ b/dbms/CMakeLists.txt
@ -20,7 +20,7 @@ set (CONFIG_VERSION ${CMAKE_CURRENT_BINARY_DIR}/src/Common/config_version.h)
 set (CONFIG_COMMON ${CMAKE_CURRENT_BINARY_DIR}/src/Common/config.h)

 include (cmake/version.cmake)
-message (STATUS "Will build ${VERSION_FULL} revision ${VERSION_REVISION}")
+message (STATUS "Will build ${VERSION_FULL} revision ${VERSION_REVISION} ${VERSION_OFFICIAL}")
 configure_file (src/Common/config.h.in ${CONFIG_COMMON})
 configure_file (src/Common/config_version.h.in ${CONFIG_VERSION})

@ -155,7 +155,6 @@ if (USE_EMBEDDED_COMPILER)
    target_include_directories (dbms SYSTEM BEFORE PUBLIC ${LLVM_INCLUDE_DIRS})
 endif ()

-
 if (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" OR CMAKE_BUILD_TYPE_UC STREQUAL "MINSIZEREL")
    # Won't generate debug info for files with heavy template instantiation to achieve faster linking and lower size.
    set_source_files_properties(
@ -214,6 +213,10 @@ target_link_libraries (clickhouse_common_io

 target_include_directories(clickhouse_common_io SYSTEM BEFORE PUBLIC ${RE2_INCLUDE_DIR})

+if (USE_LFALLOC)
+    target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${LFALLOC_INCLUDE_DIR})
+endif ()
+
 if(CPUID_LIBRARY)
    target_link_libraries(clickhouse_common_io PRIVATE ${CPUID_LIBRARY})
 endif()
--- a/dbms/cmake/version.cmake
+++ b/dbms/cmake/version.cmake
@ -1,11 +1,11 @@
 # This strings autochanged from release_lib.sh:
-set(VERSION_REVISION 54417)
+set(VERSION_REVISION 54418)
 set(VERSION_MAJOR 19)
-set(VERSION_MINOR 5)
+set(VERSION_MINOR 6)
 set(VERSION_PATCH 1)
-set(VERSION_GITHASH 628ed349c335b79a441a1bd6e4bc791d61dfe62c)
-set(VERSION_DESCRIBE v19.5.1.1-testing)
-set(VERSION_STRING 19.5.1.1)
+set(VERSION_GITHASH 30d3496c36cf3945c9828ac0b7cf7d1774a9f845)
+set(VERSION_DESCRIBE v19.6.1.1-testing)
+set(VERSION_STRING 19.6.1.1)
 # end of autochange

 set(VERSION_EXTRA "" CACHE STRING "")
@ -24,3 +24,7 @@ set (VERSION_FULL "${VERSION_NAME} ${VERSION_STRING}")
 set (VERSION_SO "${VERSION_STRING}")

 math (EXPR VERSION_INTEGER "${VERSION_PATCH} + ${VERSION_MINOR}*1000 + ${VERSION_MAJOR}*1000000")
+
+if(YANDEX_OFFICIAL_BUILD)
+    set(VERSION_OFFICIAL " (official build)")
+endif()
--- a/dbms/programs/client/Client.cpp
+++ b/dbms/programs/client/Client.cpp
@ -1523,7 +1523,7 @@ private:

    void showClientVersion()
    {
-        std::cout << DBMS_NAME << " client version " << VERSION_STRING << "." << std::endl;
+        std::cout << DBMS_NAME << " client version " << VERSION_STRING << VERSION_OFFICIAL << "." << std::endl;
    }

 public:
--- a/dbms/programs/copier/ClusterCopier.cpp
+++ b/dbms/programs/copier/ClusterCopier.cpp
@ -1,7 +1,6 @@
 #include "ClusterCopier.h"

 #include <chrono>
-
 #include <Poco/Util/XMLConfiguration.h>
 #include <Poco/Logger.h>
 #include <Poco/ConsoleChannel.h>
@ -13,14 +12,11 @@
 #include <Poco/FileChannel.h>
 #include <Poco/SplitterChannel.h>
 #include <Poco/Util/HelpFormatter.h>
-
 #include <boost/algorithm/string.hpp>
 #include <pcg_random.hpp>
-
 #include <common/logger_useful.h>
 #include <Common/ThreadPool.h>
 #include <daemon/OwnPatternFormatter.h>
-
 #include <Common/Exception.h>
 #include <Common/ZooKeeper/ZooKeeper.h>
 #include <Common/ZooKeeper/KeeperException.h>
@ -61,6 +57,7 @@
 #include <DataStreams/NullBlockOutputStream.h>
 #include <IO/Operators.h>
 #include <IO/ReadBufferFromString.h>
+#include <IO/ReadBufferFromFile.h>
 #include <Functions/registerFunctions.h>
 #include <TableFunctions/registerTableFunctions.h>
 #include <AggregateFunctions/registerAggregateFunctions.h>
@ -500,9 +497,6 @@ static ASTPtr extractPartitionKey(const ASTPtr & storage_ast)
                        ErrorCodes::BAD_ARGUMENTS);
    }

-    ASTPtr arguments_ast = engine.arguments->clone();
-    ASTs & arguments = arguments_ast->children;
-
    if (isExtendedDefinitionStorage(storage_ast))
    {
        if (storage.partition_by)
@ -516,6 +510,12 @@ static ASTPtr extractPartitionKey(const ASTPtr & storage_ast)
        bool is_replicated = startsWith(engine.name, "Replicated");
        size_t min_args = is_replicated ? 3 : 1;

+        if (!engine.arguments)
+            throw Exception("Expected arguments in " + storage_str, ErrorCodes::BAD_ARGUMENTS);
+
+        ASTPtr arguments_ast = engine.arguments->clone();
+        ASTs & arguments = arguments_ast->children;
+
        if (arguments.size() < min_args)
            throw Exception("Expected at least " + toString(min_args) + " arguments in " + storage_str, ErrorCodes::BAD_ARGUMENTS);

@ -894,6 +894,28 @@ public:
        }
    }

+    void uploadTaskDescription(const std::string & task_path, const std::string & task_file, const bool force)
+    {
+        auto local_task_description_path = task_path + "/description";
+
+        String task_config_str;
+        {
+            ReadBufferFromFile in(task_file);
+            readStringUntilEOF(task_config_str, in);
+        }
+        if (task_config_str.empty())
+            return;
+
+        auto zookeeper = context.getZooKeeper();
+
+        zookeeper->createAncestors(local_task_description_path);
+        auto code = zookeeper->tryCreate(local_task_description_path, task_config_str, zkutil::CreateMode::Persistent);
+        if (code && force)
+            zookeeper->createOrUpdate(local_task_description_path, task_config_str, zkutil::CreateMode::Persistent);
+
+        LOG_DEBUG(log, "Task description " << ((code && !force) ? "not " : "") << "uploaded to " << local_task_description_path << " with result " << code << " ("<< zookeeper->error2string(code) << ")");
+    }
+
    void reloadTaskDescription()
    {
        auto zookeeper = context.getZooKeeper();
@ -2104,6 +2126,10 @@ void ClusterCopierApp::defineOptions(Poco::Util::OptionSet & options)

    options.addOption(Poco::Util::Option("task-path", "", "path to task in ZooKeeper")
                          .argument("task-path").binding("task-path"));
+    options.addOption(Poco::Util::Option("task-file", "", "path to task file for uploading in ZooKeeper to task-path")
+                          .argument("task-file").binding("task-file"));
+    options.addOption(Poco::Util::Option("task-upload-force", "", "Force upload task-file even node already exists")
+                          .argument("task-upload-force").binding("task-upload-force"));
    options.addOption(Poco::Util::Option("safe-mode", "", "disables ALTER DROP PARTITION in case of errors")
                          .binding("safe-mode"));
    options.addOption(Poco::Util::Option("copy-fault-probability", "", "the copying fails with specified probability (used to test partition state recovering)")
@ -2154,6 +2180,11 @@ void ClusterCopierApp::mainImpl()
    auto copier = std::make_unique<ClusterCopier>(task_path, host_id, default_database, *context);
    copier->setSafeMode(is_safe_mode);
    copier->setCopyFaultProbability(copy_fault_probability);
+
+    auto task_file = config().getString("task-file", "");
+    if (!task_file.empty())
+        copier->uploadTaskDescription(task_path, task_file, config().getBool("task-upload-force", false));
+
    copier->init();
    copier->process();
 }
--- a/dbms/programs/local/LocalServer.cpp
+++ b/dbms/programs/local/LocalServer.cpp
@ -369,7 +369,7 @@ void LocalServer::setupUsers()

 static void showClientVersion()
 {
-    std::cout << DBMS_NAME << " client version " << VERSION_STRING << "." << '\n';
+    std::cout << DBMS_NAME << " client version " << VERSION_STRING << VERSION_OFFICIAL << "." << '\n';
 }

 std::string LocalServer::getHelpHeader() const
--- a/dbms/programs/server/HTTPHandler.cpp
+++ b/dbms/programs/server/HTTPHandler.cpp
@ -296,7 +296,7 @@ void HTTPHandler::processQuery(
    /// The client can pass a HTTP header indicating supported compression method (gzip or deflate).
    String http_response_compression_methods = request.get("Accept-Encoding", "");
    bool client_supports_http_compression = false;
-    ZlibCompressionMethod http_response_compression_method {};
+    CompressionMethod http_response_compression_method {};

    if (!http_response_compression_methods.empty())
    {
@ -305,12 +305,17 @@ void HTTPHandler::processQuery(
        if (std::string::npos != http_response_compression_methods.find("gzip"))
        {
            client_supports_http_compression = true;
-            http_response_compression_method = ZlibCompressionMethod::Gzip;
+            http_response_compression_method = CompressionMethod::Gzip;
        }
        else if (std::string::npos != http_response_compression_methods.find("deflate"))
        {
            client_supports_http_compression = true;
-            http_response_compression_method = ZlibCompressionMethod::Zlib;
+            http_response_compression_method = CompressionMethod::Zlib;
+        }
+        else if (http_response_compression_methods == "br")
+        {
+            client_supports_http_compression = true;
+            http_response_compression_method = CompressionMethod::Brotli;
        }
    }

@ -394,11 +399,11 @@ void HTTPHandler::processQuery(
    {
        if (http_request_compression_method_str == "gzip")
        {
-            in_post = std::make_unique<ZlibInflatingReadBuffer>(*in_post_raw, ZlibCompressionMethod::Gzip);
+            in_post = std::make_unique<ZlibInflatingReadBuffer>(*in_post_raw, CompressionMethod::Gzip);
        }
        else if (http_request_compression_method_str == "deflate")
        {
-            in_post = std::make_unique<ZlibInflatingReadBuffer>(*in_post_raw, ZlibCompressionMethod::Zlib);
+            in_post = std::make_unique<ZlibInflatingReadBuffer>(*in_post_raw, CompressionMethod::Zlib);
        }
 #if USE_BROTLI
        else if (http_request_compression_method_str == "br")
@ -606,7 +611,7 @@ void HTTPHandler::processQuery(

    executeQuery(*in, *used_output.out_maybe_delayed_and_compressed, /* allow_into_outfile = */ false, context,
        [&response] (const String & content_type) { response.setContentType(content_type); },
-        [&response] (const String & current_query_id) { response.add("Query-Id", current_query_id); });
+        [&response] (const String & current_query_id) { response.add("X-ClickHouse-Query-Id", current_query_id); });

    if (used_output.hasDelayed())
    {
--- a/dbms/programs/server/Server.cpp
+++ b/dbms/programs/server/Server.cpp
@ -132,7 +132,7 @@ int Server::run()
    }
    if (config().hasOption("version"))
    {
-        std::cout << DBMS_NAME << " server version " << VERSION_STRING << "." << std::endl;
+        std::cout << DBMS_NAME << " server version " << VERSION_STRING << VERSION_OFFICIAL << "." << std::endl;
        return 0;
    }
    return Application::run();
--- a/dbms/src/AggregateFunctions/AggregateFunctionLeastSqr.cpp
+++ b/dbms/src/AggregateFunctions/AggregateFunctionLeastSqr.cpp
@ -0,0 +1,85 @@
+#include <AggregateFunctions/AggregateFunctionLeastSqr.h>
+
+#include <AggregateFunctions/AggregateFunctionFactory.h>
+#include <AggregateFunctions/FactoryHelpers.h>
+
+
+namespace DB
+{
+
+namespace
+{
+
+AggregateFunctionPtr createAggregateFunctionLeastSqr(
+    const String & name,
+    const DataTypes & arguments,
+    const Array & params
+)
+{
+    assertNoParameters(name, params);
+    assertBinary(name, arguments);
+
+    const IDataType * x_arg = arguments.front().get();
+
+    WhichDataType which_x {
+        x_arg
+    };
+
+    const IDataType * y_arg = arguments.back().get();
+
+    WhichDataType which_y {
+        y_arg
+    };
+
+    #define FOR_LEASTSQR_TYPES_2(M, T) \
+        M(T, UInt8) \
+        M(T, UInt16) \
+        M(T, UInt32) \
+        M(T, UInt64) \
+        M(T, Int8) \
+        M(T, Int16) \
+        M(T, Int32) \
+        M(T, Int64) \
+        M(T, Float32) \
+        M(T, Float64)
+    #define FOR_LEASTSQR_TYPES(M) \
+        FOR_LEASTSQR_TYPES_2(M, UInt8) \
+        FOR_LEASTSQR_TYPES_2(M, UInt16) \
+        FOR_LEASTSQR_TYPES_2(M, UInt32) \
+        FOR_LEASTSQR_TYPES_2(M, UInt64) \
+        FOR_LEASTSQR_TYPES_2(M, Int8) \
+        FOR_LEASTSQR_TYPES_2(M, Int16) \
+        FOR_LEASTSQR_TYPES_2(M, Int32) \
+        FOR_LEASTSQR_TYPES_2(M, Int64) \
+        FOR_LEASTSQR_TYPES_2(M, Float32) \
+        FOR_LEASTSQR_TYPES_2(M, Float64)
+    #define DISPATCH(T1, T2) \
+        if (which_x.idx == TypeIndex::T1 && which_y.idx == TypeIndex::T2) \
+            return std::make_shared<AggregateFunctionLeastSqr<T1, T2>>( \
+                arguments, \
+                params \
+            );
+
+    FOR_LEASTSQR_TYPES(DISPATCH)
+
+    #undef FOR_LEASTSQR_TYPES_2
+    #undef FOR_LEASTSQR_TYPES
+    #undef DISPATCH
+
+    throw Exception(
+        "Illegal types ("
+            + x_arg->getName() + ", " + y_arg->getName()
+            + ") of arguments of aggregate function " + name
+            + ", must be Native Ints, Native UInts or Floats",
+        ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT
+    );
+}
+
+}
+
+void registerAggregateFunctionLeastSqr(AggregateFunctionFactory & factory)
+{
+    factory.registerFunction("leastSqr", createAggregateFunctionLeastSqr);
+}
+
+}
--- a/dbms/src/AggregateFunctions/AggregateFunctionLeastSqr.h
+++ b/dbms/src/AggregateFunctions/AggregateFunctionLeastSqr.h
@ -0,0 +1,195 @@
+#pragma once
+
+#include <AggregateFunctions/IAggregateFunction.h>
+#include <Columns/ColumnVector.h>
+#include <Columns/ColumnTuple.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteHelpers.h>
+#include <limits>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+}
+
+template <typename X, typename Y, typename Ret>
+struct AggregateFunctionLeastSqrData final
+{
+    size_t count = 0;
+    Ret sum_x = 0;
+    Ret sum_y = 0;
+    Ret sum_xx = 0;
+    Ret sum_xy = 0;
+
+    void add(X x, Y y)
+    {
+        count += 1;
+        sum_x += x;
+        sum_y += y;
+        sum_xx += x * x;
+        sum_xy += x * y;
+    }
+
+    void merge(const AggregateFunctionLeastSqrData & other)
+    {
+        count += other.count;
+        sum_x += other.sum_x;
+        sum_y += other.sum_y;
+        sum_xx += other.sum_xx;
+        sum_xy += other.sum_xy;
+    }
+
+    void serialize(WriteBuffer & buf) const
+    {
+        writeBinary(count, buf);
+        writeBinary(sum_x, buf);
+        writeBinary(sum_y, buf);
+        writeBinary(sum_xx, buf);
+        writeBinary(sum_xy, buf);
+    }
+
+    void deserialize(ReadBuffer & buf)
+    {
+        readBinary(count, buf);
+        readBinary(sum_x, buf);
+        readBinary(sum_y, buf);
+        readBinary(sum_xx, buf);
+        readBinary(sum_xy, buf);
+    }
+
+    Ret getK() const
+    {
+        Ret divisor = sum_xx * count - sum_x * sum_x;
+
+        if (divisor == 0)
+            return std::numeric_limits<Ret>::quiet_NaN();
+
+        return (sum_xy * count - sum_x * sum_y) / divisor;
+    }
+
+    Ret getB(Ret k) const
+    {
+        if (count == 0)
+            return std::numeric_limits<Ret>::quiet_NaN();
+
+        return (sum_y - k * sum_x) / count;
+    }
+};
+
+/// Calculates simple linear regression parameters.
+/// Result is a tuple (k, b) for y = k * x + b equation, solved by least squares approximation.
+template <typename X, typename Y, typename Ret = Float64>
+class AggregateFunctionLeastSqr final : public IAggregateFunctionDataHelper<
+    AggregateFunctionLeastSqrData<X, Y, Ret>,
+    AggregateFunctionLeastSqr<X, Y, Ret>
+>
+{
+public:
+    AggregateFunctionLeastSqr(
+        const DataTypes & arguments,
+        const Array & params
+    ):
+        IAggregateFunctionDataHelper<
+            AggregateFunctionLeastSqrData<X, Y, Ret>,
+            AggregateFunctionLeastSqr<X, Y, Ret>
+        > {arguments, params}
+    {
+        // notice: arguments has been checked before
+    }
+
+    String getName() const override
+    {
+        return "leastSqr";
+    }
+
+    const char * getHeaderFilePath() const override
+    {
+        return __FILE__;
+    }
+
+    void add(
+        AggregateDataPtr place,
+        const IColumn ** columns,
+        size_t row_num,
+        Arena *
+    ) const override
+    {
+        auto col_x {
+            static_cast<const ColumnVector<X> *>(columns[0])
+        };
+        auto col_y {
+            static_cast<const ColumnVector<Y> *>(columns[1])
+        };
+
+        X x = col_x->getData()[row_num];
+        Y y = col_y->getData()[row_num];
+
+        this->data(place).add(x, y);
+    }
+
+    void merge(
+        AggregateDataPtr place,
+        ConstAggregateDataPtr rhs, Arena *
+    ) const override
+    {
+        this->data(place).merge(this->data(rhs));
+    }
+
+    void serialize(
+        ConstAggregateDataPtr place,
+        WriteBuffer & buf
+    ) const override
+    {
+        this->data(place).serialize(buf);
+    }
+
+    void deserialize(
+        AggregateDataPtr place,
+        ReadBuffer & buf, Arena *
+    ) const override
+    {
+        this->data(place).deserialize(buf);
+    }
+
+    DataTypePtr getReturnType() const override
+    {
+        DataTypes types {
+            std::make_shared<DataTypeNumber<Ret>>(),
+            std::make_shared<DataTypeNumber<Ret>>(),
+        };
+
+        Strings names {
+            "k",
+            "b",
+        };
+
+        return std::make_shared<DataTypeTuple>(
+            std::move(types),
+            std::move(names)
+        );
+    }
+
+    void insertResultInto(
+        ConstAggregateDataPtr place,
+        IColumn & to
+    ) const override
+    {
+        Ret k = this->data(place).getK();
+        Ret b = this->data(place).getB(k);
+
+        auto & col_tuple = static_cast<ColumnTuple &>(to);
+        auto & col_k = static_cast<ColumnVector<Ret> &>(col_tuple.getColumn(0));
+        auto & col_b = static_cast<ColumnVector<Ret> &>(col_tuple.getColumn(1));
+
+        col_k.getData().push_back(k);
+        col_b.getData().push_back(b);
+    }
+};
+
+}
--- a/dbms/src/AggregateFunctions/AggregateFunctionSumMap.cpp
+++ b/dbms/src/AggregateFunctions/AggregateFunctionSumMap.cpp
@ -24,8 +24,7 @@ struct WithoutOverflowPolicy
    static DataTypePtr promoteType(const DataTypePtr & data_type)
    {
        if (!data_type->canBePromoted())
-            throw new Exception{"Values to be summed are expected to be Numeric, Float or Decimal.",
-                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
+            throw Exception{"Values to be summed are expected to be Numeric, Float or Decimal.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};

        return data_type->promoteNumericType();
    }
--- a/dbms/src/AggregateFunctions/AggregateFunctionUniq.h
+++ b/dbms/src/AggregateFunctions/AggregateFunctionUniq.h
@ -16,7 +16,6 @@
 #include <Common/HashTable/HashSet.h>
 #include <Common/HyperLogLogWithSmallSetOptimization.h>
 #include <Common/CombinedCardinalityEstimator.h>
-#include <Common/MemoryTracker.h>
 #include <Common/typeid_cast.h>

 #include <AggregateFunctions/UniquesHashSet.h>
--- a/dbms/src/AggregateFunctions/registerAggregateFunctions.cpp
+++ b/dbms/src/AggregateFunctions/registerAggregateFunctions.cpp
@ -29,6 +29,7 @@ void registerAggregateFunctionsBitwise(AggregateFunctionFactory &);
 void registerAggregateFunctionsBitmap(AggregateFunctionFactory &);
 void registerAggregateFunctionsMaxIntersections(AggregateFunctionFactory &);
 void registerAggregateFunctionEntropy(AggregateFunctionFactory &);
+void registerAggregateFunctionLeastSqr(AggregateFunctionFactory &);

 void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &);
 void registerAggregateFunctionCombinatorArray(AggregateFunctionCombinatorFactory &);
@ -69,6 +70,7 @@ void registerAggregateFunctions()
        registerAggregateFunctionHistogram(factory);
        registerAggregateFunctionRetention(factory);
        registerAggregateFunctionEntropy(factory);
+        registerAggregateFunctionLeastSqr(factory);
    }

    {
--- a/dbms/src/Client/Connection.h
+++ b/dbms/src/Client/Connection.h
@ -271,7 +271,7 @@ private:
    void initBlockInput();
    void initBlockLogsInput();

-    void throwUnexpectedPacket(UInt64 packet_type, const char * expected) const;
+    [[noreturn]] void throwUnexpectedPacket(UInt64 packet_type, const char * expected) const;
 };

 }
--- a/dbms/src/Columns/ColumnString.h
+++ b/dbms/src/Columns/ColumnString.h
@ -21,6 +21,7 @@ namespace DB
 class ColumnString final : public COWPtrHelper<IColumn, ColumnString>
 {
 public:
+    using Char = UInt8;
    using Chars = PaddedPODArray<UInt8>;

 private:
--- a/dbms/src/Common/AlignedBuffer.h
+++ b/dbms/src/Common/AlignedBuffer.h
@ -10,7 +10,7 @@ namespace DB

 /** Aligned piece of memory.
  * It can only be allocated and destroyed.
-  * MemoryTracker is not used. It is intended for small pieces of memory.
+  * MemoryTracker is not used. AlignedBuffer is intended for small pieces of memory.
  */
 class AlignedBuffer : private boost::noncopyable
 {
--- a/dbms/src/Common/Allocator.cpp
+++ b/dbms/src/Common/Allocator.cpp
@ -1,190 +0,0 @@
-#include <Common/Allocator.h>
-
-#if !defined(__APPLE__) && !defined(__FreeBSD__)
-#include <malloc.h>
-#endif
-
-#include <cstdlib>
-#include <algorithm>
-#include <sys/mman.h>
-
-#include <Core/Defines.h>
-#ifdef THREAD_SANITIZER
-    /// Thread sanitizer does not intercept mremap. The usage of mremap will lead to false positives.
-    #define DISABLE_MREMAP 1
-#endif
-#include <common/mremap.h>
-
-#include <Common/MemoryTracker.h>
-#include <Common/Exception.h>
-#include <Common/formatReadable.h>
-#include <IO/WriteHelpers.h>
-
-
-/// Required for older Darwin builds, that lack definition of MAP_ANONYMOUS
-#ifndef MAP_ANONYMOUS
-#define MAP_ANONYMOUS MAP_ANON
-#endif
-
-
-namespace DB
-{
-namespace ErrorCodes
-{
-    extern const int BAD_ARGUMENTS;
-    extern const int CANNOT_ALLOCATE_MEMORY;
-    extern const int CANNOT_MUNMAP;
-    extern const int CANNOT_MREMAP;
-}
-}
-
-
-/** Many modern allocators (for example, tcmalloc) do not do a mremap for realloc,
-  *  even in case of large enough chunks of memory.
-  * Although this allows you to increase performance and reduce memory consumption during realloc.
-  * To fix this, we do mremap manually if the chunk of memory is large enough.
-  * The threshold (64 MB) is chosen quite large, since changing the address space is
-  *  very slow, especially in the case of a large number of threads.
-  * We expect that the set of operations mmap/something to do/mremap can only be performed about 1000 times per second.
-  *
-  * PS. This is also required, because tcmalloc can not allocate a chunk of memory greater than 16 GB.
-  */
-#ifdef NDEBUG
-    static constexpr size_t MMAP_THRESHOLD = 64 * (1ULL << 20);
-#else
-    /// In debug build, use small mmap threshold to reproduce more memory stomping bugs.
-    /// Along with ASLR it will hopefully detect more issues than ASan.
-    /// The program may fail due to the limit on number of memory mappings.
-    static constexpr size_t MMAP_THRESHOLD = 4096;
-#endif
-
-static constexpr size_t MMAP_MIN_ALIGNMENT = 4096;
-static constexpr size_t MALLOC_MIN_ALIGNMENT = 8;
-
-
-template <bool clear_memory_>
-void * Allocator<clear_memory_>::mmap_hint()
-{
-#if ALLOCATOR_ASLR
-    return reinterpret_cast<void *>(std::uniform_int_distribution<intptr_t>(0x100000000000UL, 0x700000000000UL)(rng));
-#else
-    return nullptr;
-#endif
-}
-
-
-template <bool clear_memory_>
-void * Allocator<clear_memory_>::alloc(size_t size, size_t alignment)
-{
-    CurrentMemoryTracker::alloc(size);
-
-    void * buf;
-
-    if (size >= MMAP_THRESHOLD)
-    {
-        if (alignment > MMAP_MIN_ALIGNMENT)
-            throw DB::Exception("Too large alignment " + formatReadableSizeWithBinarySuffix(alignment) + ": more than page size when allocating "
-                + formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::BAD_ARGUMENTS);
-
-        buf = mmap(mmap_hint(), size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-        if (MAP_FAILED == buf)
-            DB::throwFromErrno("Allocator: Cannot mmap " + formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
-
-        /// No need for zero-fill, because mmap guarantees it.
-    }
-    else
-    {
-        if (alignment <= MALLOC_MIN_ALIGNMENT)
-        {
-            if (clear_memory)
-                buf = ::calloc(size, 1);
-            else
-                buf = ::malloc(size);
-
-            if (nullptr == buf)
-                DB::throwFromErrno("Allocator: Cannot malloc " + formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
-        }
-        else
-        {
-            buf = nullptr;
-            int res = posix_memalign(&buf, alignment, size);
-
-            if (0 != res)
-                DB::throwFromErrno("Cannot allocate memory (posix_memalign) " + formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, res);
-
-            if (clear_memory)
-                memset(buf, 0, size);
-        }
-    }
-
-    return buf;
-}
-
-
-template <bool clear_memory_>
-void Allocator<clear_memory_>::free(void * buf, size_t size)
-{
-    if (size >= MMAP_THRESHOLD)
-    {
-        if (0 != munmap(buf, size))
-            DB::throwFromErrno("Allocator: Cannot munmap " + formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::CANNOT_MUNMAP);
-    }
-    else
-    {
-        ::free(buf);
-    }
-
-    CurrentMemoryTracker::free(size);
-}
-
-
-template <bool clear_memory_>
-void * Allocator<clear_memory_>::realloc(void * buf, size_t old_size, size_t new_size, size_t alignment)
-{
-    if (old_size == new_size)
-    {
-        /// nothing to do.
-        /// BTW, it's not possible to change alignment while doing realloc.
-    }
-    else if (old_size < MMAP_THRESHOLD && new_size < MMAP_THRESHOLD && alignment <= MALLOC_MIN_ALIGNMENT)
-    {
-        /// Resize malloc'd memory region with no special alignment requirement.
-        CurrentMemoryTracker::realloc(old_size, new_size);
-
-        void * new_buf = ::realloc(buf, new_size);
-        if (nullptr == new_buf)
-            DB::throwFromErrno("Allocator: Cannot realloc from " + formatReadableSizeWithBinarySuffix(old_size) + " to " + formatReadableSizeWithBinarySuffix(new_size) + ".", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
-
-        buf = new_buf;
-        if (clear_memory && new_size > old_size)
-            memset(reinterpret_cast<char *>(buf) + old_size, 0, new_size - old_size);
-    }
-    else if (old_size >= MMAP_THRESHOLD && new_size >= MMAP_THRESHOLD)
-    {
-        /// Resize mmap'd memory region.
-        CurrentMemoryTracker::realloc(old_size, new_size);
-
-        // On apple and freebsd self-implemented mremap used (common/mremap.h)
-        buf = clickhouse_mremap(buf, old_size, new_size, MREMAP_MAYMOVE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-        if (MAP_FAILED == buf)
-            DB::throwFromErrno("Allocator: Cannot mremap memory chunk from " + formatReadableSizeWithBinarySuffix(old_size) + " to " + formatReadableSizeWithBinarySuffix(new_size) + ".", DB::ErrorCodes::CANNOT_MREMAP);
-
-        /// No need for zero-fill, because mmap guarantees it.
-    }
-    else
-    {
-        /// All other cases that requires a copy. MemoryTracker is called inside 'alloc', 'free' methods.
-
-        void * new_buf = alloc(new_size, alignment);
-        memcpy(new_buf, buf, std::min(old_size, new_size));
-        free(buf, old_size);
-        buf = new_buf;
-    }
-
-    return buf;
-}
-
-
-/// Explicit template instantiations.
-template class Allocator<true>;
-template class Allocator<false>;
--- a/dbms/src/Common/Allocator.h
+++ b/dbms/src/Common/Allocator.h
@ -10,11 +10,88 @@
    #define ALLOCATOR_ASLR 1
 #endif

-#if ALLOCATOR_ASLR
-    #include <pcg_random.hpp>
-    #include <Common/randomSeed.h>
+#include <pcg_random.hpp>
+#include <Common/randomSeed.h>
+
+#if !defined(__APPLE__) && !defined(__FreeBSD__)
+#include <malloc.h>
 #endif

+#include <cstdlib>
+#include <algorithm>
+#include <sys/mman.h>
+
+#include <Core/Defines.h>
+#ifdef THREAD_SANITIZER
+    /// Thread sanitizer does not intercept mremap. The usage of mremap will lead to false positives.
+    #define DISABLE_MREMAP 1
+#endif
+#include <common/mremap.h>
+
+#include <Common/MemoryTracker.h>
+#include <Common/Exception.h>
+#include <Common/formatReadable.h>
+
+
+/// Required for older Darwin builds, that lack definition of MAP_ANONYMOUS
+#ifndef MAP_ANONYMOUS
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+
+
+/** Many modern allocators (for example, tcmalloc) do not do a mremap for realloc,
+  *  even in case of large enough chunks of memory.
+  * Although this allows you to increase performance and reduce memory consumption during realloc.
+  * To fix this, we do mremap manually if the chunk of memory is large enough.
+  * The threshold (64 MB) is chosen quite large, since changing the address space is
+  *  very slow, especially in the case of a large number of threads.
+  * We expect that the set of operations mmap/something to do/mremap can only be performed about 1000 times per second.
+  *
+  * PS. This is also required, because tcmalloc can not allocate a chunk of memory greater than 16 GB.
+  */
+#ifdef NDEBUG
+    static constexpr size_t MMAP_THRESHOLD = 64 * (1ULL << 20);
+#else
+    /// In debug build, use small mmap threshold to reproduce more memory stomping bugs.
+    /// Along with ASLR it will hopefully detect more issues than ASan.
+    /// The program may fail due to the limit on number of memory mappings.
+    static constexpr size_t MMAP_THRESHOLD = 4096;
+#endif
+
+static constexpr size_t MMAP_MIN_ALIGNMENT = 4096;
+static constexpr size_t MALLOC_MIN_ALIGNMENT = 8;
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+    extern const int CANNOT_ALLOCATE_MEMORY;
+    extern const int CANNOT_MUNMAP;
+    extern const int CANNOT_MREMAP;
+}
+}
+
+namespace AllocatorHints
+{
+struct DefaultHint
+{
+    void * mmap_hint()
+    {
+        return nullptr;
+    }
+};
+
+struct RandomHint
+{
+    void * mmap_hint()
+    {
+        return reinterpret_cast<void *>(std::uniform_int_distribution<intptr_t>(0x100000000000UL, 0x700000000000UL)(rng));
+    }
+private:
+    pcg64 rng{randomSeed()};
+};
+}

 /** Responsible for allocating / freeing memory. Used, for example, in PODArray, Arena.
  * Also used in hash tables.
@ -23,31 +100,126 @@
  * - passing the size into the `free` method;
  * - by the presence of the `alignment` argument;
  * - the possibility of zeroing memory (used in hash tables);
+  * - hint class for mmap
+  * - mmap_threshold for using mmap less or more
  */
-template <bool clear_memory_>
-class Allocator
+template <bool clear_memory_, typename Hint, size_t mmap_threshold>
+class AllocatorWithHint : Hint
 {
-#if ALLOCATOR_ASLR
-private:
-    pcg64 rng{randomSeed()};
-#endif
-    void * mmap_hint();
-
 protected:
    static constexpr bool clear_memory = clear_memory_;

 public:
    /// Allocate memory range.
-    void * alloc(size_t size, size_t alignment = 0);
+    void * alloc(size_t size, size_t alignment = 0)
+    {
+        CurrentMemoryTracker::alloc(size);
+
+        void * buf;
+
+        if (size >= mmap_threshold)
+        {
+            if (alignment > MMAP_MIN_ALIGNMENT)
+                throw DB::Exception("Too large alignment " + formatReadableSizeWithBinarySuffix(alignment) + ": more than page size when allocating "
+                    + formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::BAD_ARGUMENTS);
+
+            buf = mmap(Hint::mmap_hint(), size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+            if (MAP_FAILED == buf)
+                DB::throwFromErrno("Allocator: Cannot mmap " + formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
+
+            /// No need for zero-fill, because mmap guarantees it.
+        }
+        else
+        {
+            if (alignment <= MALLOC_MIN_ALIGNMENT)
+            {
+                if constexpr (clear_memory)
+                    buf = ::calloc(size, 1);
+                else
+                    buf = ::malloc(size);
+
+                if (nullptr == buf)
+                    DB::throwFromErrno("Allocator: Cannot malloc " + formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
+            }
+            else
+            {
+                buf = nullptr;
+                int res = posix_memalign(&buf, alignment, size);
+
+                if (0 != res)
+                    DB::throwFromErrno("Cannot allocate memory (posix_memalign) " + formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, res);
+
+                if (clear_memory)
+                    memset(buf, 0, size);
+            }
+        }
+        return buf;
+    }

    /// Free memory range.
-    void free(void * buf, size_t size);
+    void free(void * buf, size_t size)
+    {
+        if (size >= mmap_threshold)
+        {
+            if (0 != munmap(buf, size))
+                DB::throwFromErrno("Allocator: Cannot munmap " + formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::CANNOT_MUNMAP);
+        }
+        else
+        {
+            ::free(buf);
+        }
+
+        CurrentMemoryTracker::free(size);
+    }

    /** Enlarge memory range.
      * Data from old range is moved to the beginning of new range.
      * Address of memory range could change.
      */
-    void * realloc(void * buf, size_t old_size, size_t new_size, size_t alignment = 0);
+    void * realloc(void * buf, size_t old_size, size_t new_size, size_t alignment = 0)
+    {
+        if (old_size == new_size)
+        {
+            /// nothing to do.
+            /// BTW, it's not possible to change alignment while doing realloc.
+        }
+        else if (old_size < mmap_threshold && new_size < mmap_threshold && alignment <= MALLOC_MIN_ALIGNMENT)
+        {
+            /// Resize malloc'd memory region with no special alignment requirement.
+            CurrentMemoryTracker::realloc(old_size, new_size);
+
+            void * new_buf = ::realloc(buf, new_size);
+            if (nullptr == new_buf)
+                DB::throwFromErrno("Allocator: Cannot realloc from " + formatReadableSizeWithBinarySuffix(old_size) + " to " + formatReadableSizeWithBinarySuffix(new_size) + ".", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
+
+            buf = new_buf;
+            if (clear_memory && new_size > old_size)
+                memset(reinterpret_cast<char *>(buf) + old_size, 0, new_size - old_size);
+        }
+        else if (old_size >= mmap_threshold && new_size >= mmap_threshold)
+        {
+            /// Resize mmap'd memory region.
+            CurrentMemoryTracker::realloc(old_size, new_size);
+
+            // On apple and freebsd self-implemented mremap used (common/mremap.h)
+            buf = clickhouse_mremap(buf, old_size, new_size, MREMAP_MAYMOVE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+            if (MAP_FAILED == buf)
+                DB::throwFromErrno("Allocator: Cannot mremap memory chunk from " + formatReadableSizeWithBinarySuffix(old_size) + " to " + formatReadableSizeWithBinarySuffix(new_size) + ".", DB::ErrorCodes::CANNOT_MREMAP);
+
+            /// No need for zero-fill, because mmap guarantees it.
+        }
+        else
+        {
+            /// All other cases that requires a copy. MemoryTracker is called inside 'alloc', 'free' methods.
+
+            void * new_buf = alloc(new_size, alignment);
+            memcpy(new_buf, buf, std::min(old_size, new_size));
+            free(buf, old_size);
+            buf = new_buf;
+        }
+
+        return buf;
+    }

 protected:
    static constexpr size_t getStackThreshold()
@ -56,6 +228,13 @@ protected:
    }
 };

+#if ALLOCATOR_ASLR
+template <bool clear_memory>
+using Allocator = AllocatorWithHint<clear_memory, AllocatorHints::RandomHint, MMAP_THRESHOLD>;
+#else
+template <bool clear_memory>
+using Allocator = AllocatorWithHint<clear_memory, AllocatorHints::DefaultHint, MMAP_THRESHOLD>;
+#endif

 /** When using AllocatorWithStackMemory, located on the stack,
  *  GCC 4.9 mistakenly assumes that we can call `free` from a pointer to the stack.
--- a/dbms/src/Common/Arena.h
+++ b/dbms/src/Common/Arena.h
@ -49,7 +49,7 @@ private:
            ProfileEvents::increment(ProfileEvents::ArenaAllocChunks);
            ProfileEvents::increment(ProfileEvents::ArenaAllocBytes, size_);

-            begin = reinterpret_cast<char *>(Allocator::alloc(size_));
+            begin = reinterpret_cast<char *>(Allocator<false>::alloc(size_));
            pos = begin;
            end = begin + size_ - pad_right;
            prev = prev_;
@ -57,7 +57,7 @@ private:

        ~Chunk()
        {
-            Allocator::free(begin, size());
+            Allocator<false>::free(begin, size());

            if (prev)
                delete prev;
--- a/dbms/src/Common/ArenaWithFreeLists.h
+++ b/dbms/src/Common/ArenaWithFreeLists.h
@ -55,7 +55,7 @@ public:
    char * alloc(const size_t size)
    {
        if (size > max_fixed_block_size)
-            return static_cast<char *>(Allocator::alloc(size));
+            return static_cast<char *>(Allocator<false>::alloc(size));

        /// find list of required size
        const auto list_idx = findFreeListIndex(size);
@ -76,7 +76,7 @@ public:
    void free(char * ptr, const size_t size)
    {
        if (size > max_fixed_block_size)
-            return Allocator::free(ptr, size);
+            return Allocator<false>::free(ptr, size);

        /// find list of required size
        const auto list_idx = findFreeListIndex(size);
--- a/dbms/src/Common/ErrorCodes.cpp
+++ b/dbms/src/Common/ErrorCodes.cpp
@ -422,6 +422,10 @@ namespace ErrorCodes
    extern const int CANNOT_MPROTECT = 445;
    extern const int FUNCTION_NOT_ALLOWED = 446;
    extern const int HYPERSCAN_CANNOT_SCAN_TEXT = 447;
+    extern const int BROTLI_READ_FAILED = 448;
+    extern const int BROTLI_WRITE_FAILED = 449;
+    extern const int BAD_TTL_EXPRESSION = 450;
+    extern const int BAD_TTL_FILE = 451;

    extern const int KEEPER_EXCEPTION = 999;
    extern const int POCO_EXCEPTION = 1000;
--- a/dbms/src/Common/Exception.cpp
+++ b/dbms/src/Common/Exception.cpp
@ -21,11 +21,6 @@ namespace ErrorCodes
    extern const int CANNOT_TRUNCATE_FILE;
 }

-const char * getVersion()
-{
-    return VERSION_STRING;
-}
-
 std::string errnoToString(int code, int e)
 {
    const size_t buf_size = 128;
@ -82,14 +77,15 @@ std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded
    }
    catch (const Exception & e)
    {
-        stream << "(version " << getVersion() << ") " << getExceptionMessage(e, with_stacktrace, check_embedded_stacktrace);
+        stream << getExceptionMessage(e, with_stacktrace, check_embedded_stacktrace) << " (version " << VERSION_STRING << VERSION_OFFICIAL << ")";
    }
    catch (const Poco::Exception & e)
    {
        try
        {
-            stream << "(version " << getVersion() << ") " << "Poco::Exception. Code: " << ErrorCodes::POCO_EXCEPTION << ", e.code() = " << e.code()
-                << ", e.displayText() = " << e.displayText();
+            stream << "Poco::Exception. Code: " << ErrorCodes::POCO_EXCEPTION << ", e.code() = " << e.code()
+                << ", e.displayText() = " << e.displayText()
+                << " (version " << VERSION_STRING << VERSION_OFFICIAL << ")";
        }
        catch (...) {}
    }
@ -103,7 +99,7 @@ std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded
            if (status)
                name += " (demangling status: " + toString(status) + ")";

-            stream << "(version " << getVersion() << ") " << "std::exception. Code: " << ErrorCodes::STD_EXCEPTION << ", type: " << name << ", e.what() = " << e.what();
+            stream << "std::exception. Code: " << ErrorCodes::STD_EXCEPTION << ", type: " << name << ", e.what() = " << e.what() << ", version = " << VERSION_STRING << VERSION_OFFICIAL;
        }
        catch (...) {}
    }
@ -117,7 +113,7 @@ std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded
            if (status)
                name += " (demangling status: " + toString(status) + ")";

-            stream << "(version " << getVersion() << ") " << "Unknown exception. Code: " << ErrorCodes::UNKNOWN_EXCEPTION << ", type: " << name;
+            stream << "Unknown exception. Code: " << ErrorCodes::UNKNOWN_EXCEPTION << ", type: " << name << " (version " << VERSION_STRING << VERSION_OFFICIAL << ")";
        }
        catch (...) {}
    }
--- a/dbms/src/Common/IFactoryWithAliases.h
+++ b/dbms/src/Common/IFactoryWithAliases.h
@ -20,7 +20,7 @@ namespace ErrorCodes
 * template parameter is available as Creator
 */
 template <typename CreatorFunc>
-class IFactoryWithAliases
+class IFactoryWithAliases : public IHints<2, IFactoryWithAliases<CreatorFunc>>
 {
 protected:
    using Creator = CreatorFunc;
@ -76,7 +76,7 @@ public:
            throw Exception(factory_name + ": alias name '" + alias_name + "' is not unique", ErrorCodes::LOGICAL_ERROR);
    }

-    std::vector<String> getAllRegisteredNames() const
+    std::vector<String> getAllRegisteredNames() const override
    {
        std::vector<String> result;
        auto getter = [](const auto & pair) { return pair.first; };
@ -106,13 +106,7 @@ public:
        return aliases.count(name) || case_insensitive_aliases.count(name);
    }

-    std::vector<String> getHints(const String & name) const
-    {
-        static const auto registered_names = getAllRegisteredNames();
-        return prompter.getHints(name, registered_names);
-    }
-
-    virtual ~IFactoryWithAliases() {}
+    virtual ~IFactoryWithAliases() override {}

 private:
    using InnerMap = std::unordered_map<String, Creator>; // name -> creator
@ -127,13 +121,6 @@ private:

    /// Case insensitive aliases
    AliasMap case_insensitive_aliases;
-
-    /**
-      * prompter for names, if a person makes a typo for some function or type, it
-      * helps to find best possible match (in particular, edit distance is done like in clang
-      * (max edit distance is (typo.size() + 2) / 3)
-      */
-    NamePrompter</*MaxNumHints=*/2> prompter;
 };

 }
--- a/dbms/src/Common/LFAllocator.cpp
+++ b/dbms/src/Common/LFAllocator.cpp
@ -0,0 +1,53 @@
+#include <Common/config.h>
+
+#if USE_LFALLOC
+#include "LFAllocator.h"
+
+#include <cstring>
+#include <lf_allocX64.h>
+
+namespace DB
+{
+
+void * LFAllocator::alloc(size_t size, size_t alignment)
+{
+    if (alignment == 0)
+        return LFAlloc(size);
+    else
+    {
+        void * ptr;
+        int res = LFPosixMemalign(&ptr, alignment, size);
+        return res ? nullptr : ptr;
+    }
+}
+
+void LFAllocator::free(void * buf, size_t)
+{
+    LFFree(buf);
+}
+
+void * LFAllocator::realloc(void * old_ptr, size_t, size_t new_size, size_t alignment)
+{
+    if (old_ptr == nullptr)
+    {
+        void * result = LFAllocator::alloc(new_size, alignment);
+        return result;
+    }
+    if (new_size == 0)
+    {
+        LFFree(old_ptr);
+        return nullptr;
+    }
+
+    void * new_ptr = LFAllocator::alloc(new_size, alignment);
+    if (new_ptr == nullptr)
+        return nullptr;
+    size_t old_size = LFGetSize(old_ptr);
+    memcpy(new_ptr, old_ptr, ((old_size < new_size) ? old_size : new_size));
+    LFFree(old_ptr);
+    return new_ptr;
+}
+
+}
+
+#endif
--- a/dbms/src/Common/LFAllocator.h
+++ b/dbms/src/Common/LFAllocator.h
@ -0,0 +1,22 @@
+#pragma once
+
+#include <Common/config.h>
+
+#if !USE_LFALLOC
+#error "do not include this file until USE_LFALLOC is set to 1"
+#endif
+
+#include <cstddef>
+
+namespace DB
+{
+struct LFAllocator
+{
+    static void * alloc(size_t size, size_t alignment = 0);
+
+    static void free(void * buf, size_t);
+
+    static void * realloc(void * buf, size_t, size_t new_size, size_t alignment = 0);
+};
+
+}
--- a/dbms/src/Common/NamePrompter.h
+++ b/dbms/src/Common/NamePrompter.h
@ -97,4 +97,23 @@ private:
    }
 };

+template <size_t MaxNumHints, class Self>
+class IHints
+{
+public:
+
+    virtual std::vector<String> getAllRegisteredNames() const = 0;
+
+    std::vector<String> getHints(const String & name) const
+    {
+        static const auto registered_names = getAllRegisteredNames();
+        return prompter.getHints(name, registered_names);
+    }
+
+    virtual ~IHints() = default;
+
+private:
+    NamePrompter<MaxNumHints> prompter;
+};
+
 }
--- a/dbms/src/Common/SortedLookupPODArray.h
+++ b/dbms/src/Common/SortedLookupPODArray.h
@ -1,48 +0,0 @@
-#pragma once
-
-#include <Common/PODArray.h>
-
-namespace DB
-{
-
-/**
- * This class is intended to push sortable data into.
- * When looking up values the container ensures that it is sorted for log(N) lookup
- *
- * Note, this is only efficient when the insertions happen in one stage, followed by all retrievals
- * This way the data only gets sorted once.
- */
-
-template <typename T, size_t INITIAL_SIZE = 4096, typename TAllocator = Allocator<false>>
-class SortedLookupPODArray : private PaddedPODArray<T, INITIAL_SIZE, TAllocator>
-{
-public:
-    using Base = PaddedPODArray<T, INITIAL_SIZE, TAllocator>;
-    using typename Base::PODArray;
-    using Base::cbegin;
-    using Base::cend;
-
-    template <typename U, typename ... TAllocatorParams>
-    void insert(U && x, TAllocatorParams &&... allocator_params)
-    {
-        Base::push_back(std::forward<U>(x), std::forward<TAllocatorParams>(allocator_params)...);
-        sorted = false;
-    }
-
-    typename Base::const_iterator upper_bound (const T& k)
-    {
-        if (!sorted)
-            this->sort();
-        return std::upper_bound(this->cbegin(), this->cend(), k);
-    }
-private:
-    void sort()
-    {
-        std::sort(this->begin(), this->end());
-        sorted = true;
-    }
-
-    bool sorted = false;
-};
-
-}
--- a/dbms/src/Common/config.h.in
+++ b/dbms/src/Common/config.h.in
@ -25,6 +25,8 @@
 #cmakedefine01 USE_BROTLI
 #cmakedefine01 USE_SSL
 #cmakedefine01 USE_HYPERSCAN
+#cmakedefine01 USE_LFALLOC
+#cmakedefine01 USE_LFALLOC_RANDOM_HINT

 #cmakedefine01 CLICKHOUSE_SPLIT_BINARY
 #cmakedefine01 LLVM_HAS_RTTI
--- a/dbms/src/Common/config_version.h.in
+++ b/dbms/src/Common/config_version.h.in
@ -20,6 +20,7 @@
 #cmakedefine VERSION_MINOR @VERSION_MINOR@
 #cmakedefine VERSION_PATCH @VERSION_PATCH@
 #cmakedefine VERSION_STRING "@VERSION_STRING@"
+#cmakedefine VERSION_OFFICIAL "@VERSION_OFFICIAL@"
 #cmakedefine VERSION_FULL "@VERSION_FULL@"
 #cmakedefine VERSION_DESCRIBE "@VERSION_DESCRIBE@"
 #cmakedefine VERSION_GITHASH "@VERSION_GITHASH@"
@ -42,3 +43,7 @@
 #else
 #define DBMS_VERSION_PATCH 0
 #endif
+
+#if !defined(VERSION_OFFICIAL)
+#   define VERSION_OFFICIAL ""
+#endif
--- a/dbms/src/Compression/CachedCompressedReadBuffer.cpp
+++ b/dbms/src/Compression/CachedCompressedReadBuffer.cpp
@ -35,7 +35,7 @@ bool CachedCompressedReadBuffer::nextImpl()
    UInt128 key = cache->hash(path, file_pos);
    owned_cell = cache->get(key);

-    if (!owned_cell || !codec)
+    if (!owned_cell)
    {
        /// If not, read it from the file.
        initInput();
@ -49,21 +49,22 @@ bool CachedCompressedReadBuffer::nextImpl()

        if (owned_cell->compressed_size)
        {
-            owned_cell->data.resize(size_decompressed + codec->getAdditionalSizeAtTheEndOfBuffer());
+            owned_cell->additional_bytes = codec->getAdditionalSizeAtTheEndOfBuffer();
+            owned_cell->data.resize(size_decompressed + owned_cell->additional_bytes);
            decompress(owned_cell->data.data(), size_decompressed, size_compressed_without_checksum);

-            /// Put data into cache.
-            cache->set(key, owned_cell);
        }
+
+        /// Put data into cache.
+        /// NOTE: Even if we don't read anything (compressed_size == 0)
+        /// because we can reuse this information and don't reopen file in future
+        cache->set(key, owned_cell);
    }

    if (owned_cell->data.size() == 0)
-    {
-        owned_cell = nullptr;
        return false;
-    }

-    working_buffer = Buffer(owned_cell->data.data(), owned_cell->data.data() + owned_cell->data.size() - codec->getAdditionalSizeAtTheEndOfBuffer());
+    working_buffer = Buffer(owned_cell->data.data(), owned_cell->data.data() + owned_cell->data.size() - owned_cell->additional_bytes);

    file_pos += owned_cell->compressed_size;

--- a/dbms/src/Compression/CompressionCodecDelta.cpp
+++ b/dbms/src/Compression/CompressionCodecDelta.cpp
@ -125,19 +125,34 @@ void CompressionCodecDelta::doDecompressData(const char * source, UInt32 source_
    }
 }

+namespace
+{
+
+UInt8 getDeltaBytesSize(DataTypePtr column_type)
+{
+    UInt8 delta_bytes_size = 1;
+    if (column_type && column_type->haveMaximumSizeOfValue())
+    {
+        size_t max_size = column_type->getSizeOfValueInMemory();
+        if (max_size == 1 || max_size == 2 || max_size == 4 || max_size == 8)
+            delta_bytes_size = static_cast<UInt8>(max_size);
+    }
+    return delta_bytes_size;
+}
+
+}
+
+void CompressionCodecDelta::useInfoAboutType(DataTypePtr data_type)
+{
+    delta_bytes_size = getDeltaBytesSize(data_type);
+}
+
 void registerCodecDelta(CompressionCodecFactory & factory)
 {
    UInt8 method_code = UInt8(CompressionMethodByte::Delta);
    factory.registerCompressionCodecWithType("Delta", method_code, [&](const ASTPtr & arguments, DataTypePtr column_type) -> CompressionCodecPtr
    {
-        UInt8 delta_bytes_size = 1;
-        if (column_type && column_type->haveMaximumSizeOfValue())
-        {
-            size_t max_size = column_type->getSizeOfValueInMemory();
-            if (max_size == 1 || max_size == 2 || max_size == 4 || max_size == 8)
-                delta_bytes_size = static_cast<UInt8>(max_size);
-        }
-
+        UInt8 delta_bytes_size = getDeltaBytesSize(column_type);
        if (arguments && !arguments->children.empty())
        {
            if (arguments->children.size() > 1)
--- a/dbms/src/Compression/CompressionCodecDelta.h
+++ b/dbms/src/Compression/CompressionCodecDelta.h
@ -14,15 +14,18 @@ public:

    String getCodecDesc() const override;

+    void useInfoAboutType(DataTypePtr data_type) override;
+
 protected:
    UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;

    void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override;

    UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override { return uncompressed_size + 2; }
+
+
 private:
-    const UInt8 delta_bytes_size;
+    UInt8 delta_bytes_size;
 };

 }
-
--- a/dbms/src/Compression/CompressionCodecMultiple.cpp
+++ b/dbms/src/Compression/CompressionCodecMultiple.cpp
@ -21,16 +21,6 @@ extern const int CORRUPTED_DATA;
 CompressionCodecMultiple::CompressionCodecMultiple(Codecs codecs)
    : codecs(codecs)
 {
-    std::ostringstream ss;
-    for (size_t idx = 0; idx < codecs.size(); idx++)
-    {
-        if (idx != 0)
-            ss << ',' << ' ';
-
-        const auto codec = codecs[idx];
-        ss << codec->getCodecDesc();
-    }
-    codec_desc = ss.str();
 }

 UInt8 CompressionCodecMultiple::getMethodByte() const
@ -40,7 +30,16 @@ UInt8 CompressionCodecMultiple::getMethodByte() const

 String CompressionCodecMultiple::getCodecDesc() const
 {
-    return codec_desc;
+    std::ostringstream ss;
+    for (size_t idx = 0; idx < codecs.size(); idx++)
+    {
+        if (idx != 0)
+            ss << ',' << ' ';
+
+        const auto codec = codecs[idx];
+        ss << codec->getCodecDesc();
+    }
+    return ss.str();
 }

 UInt32 CompressionCodecMultiple::getMaxCompressedDataSize(UInt32 uncompressed_size) const
@ -79,6 +78,14 @@ UInt32 CompressionCodecMultiple::doCompressData(const char * source, UInt32 sour
    return 1 + codecs.size() + source_size;
 }

+void CompressionCodecMultiple::useInfoAboutType(DataTypePtr data_type)
+{
+    for (auto & codec : codecs)
+    {
+        codec->useInfoAboutType(data_type);
+    }
+}
+
 void CompressionCodecMultiple::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 decompressed_size) const
 {
    UInt8 compression_methods_size = source[0];
--- a/dbms/src/Compression/CompressionCodecMultiple.h
+++ b/dbms/src/Compression/CompressionCodecMultiple.h
@ -17,6 +17,8 @@ public:

    UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override;

+    void useInfoAboutType(DataTypePtr data_type) override;
+
 protected:
    UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;

@ -24,7 +26,6 @@ protected:

 private:
    Codecs codecs;
-    String codec_desc;

 };

--- a/dbms/src/Compression/ICompressionCodec.h
+++ b/dbms/src/Compression/ICompressionCodec.h
@ -58,6 +58,9 @@ public:
    /// Read method byte from compressed source
    static UInt8 readMethod(const char * source);

+    /// Some codecs may use information about column type which appears after codec creation
+    virtual void useInfoAboutType(DataTypePtr /* data_type */) { }
+
 protected:

    /// Return size of compressed data without header
--- a/dbms/src/Core/BackgroundSchedulePool.cpp
+++ b/dbms/src/Core/BackgroundSchedulePool.cpp
@ -23,20 +23,21 @@ namespace DB
 class TaskNotification final : public Poco::Notification
 {
 public:
-    explicit TaskNotification(const BackgroundSchedulePool::TaskInfoPtr & task) : task(task) {}
+    explicit TaskNotification(const BackgroundSchedulePoolTaskInfoPtr & task) : task(task) {}
    void execute() { task->execute(); }

 private:
-    BackgroundSchedulePool::TaskInfoPtr task;
+    BackgroundSchedulePoolTaskInfoPtr task;
 };


-BackgroundSchedulePool::TaskInfo::TaskInfo(BackgroundSchedulePool & pool_, const std::string & log_name_, const TaskFunc & function_)
-    : pool(pool_) , log_name(log_name_) , function(function_)
+BackgroundSchedulePoolTaskInfo::BackgroundSchedulePoolTaskInfo(
+    BackgroundSchedulePool & pool_, const std::string & log_name_, const BackgroundSchedulePool::TaskFunc & function_)
+    : pool(pool_), log_name(log_name_), function(function_)
 {
 }

-bool BackgroundSchedulePool::TaskInfo::schedule()
+bool BackgroundSchedulePoolTaskInfo::schedule()
 {
    std::lock_guard lock(schedule_mutex);

@ -47,7 +48,7 @@ bool BackgroundSchedulePool::TaskInfo::schedule()
    return true;
 }

-bool BackgroundSchedulePool::TaskInfo::scheduleAfter(size_t ms)
+bool BackgroundSchedulePoolTaskInfo::scheduleAfter(size_t ms)
 {
    std::lock_guard lock(schedule_mutex);

@ -58,7 +59,7 @@ bool BackgroundSchedulePool::TaskInfo::scheduleAfter(size_t ms)
    return true;
 }

-void BackgroundSchedulePool::TaskInfo::deactivate()
+void BackgroundSchedulePoolTaskInfo::deactivate()
 {
    std::lock_guard lock_exec(exec_mutex);
    std::lock_guard lock_schedule(schedule_mutex);
@ -73,13 +74,13 @@ void BackgroundSchedulePool::TaskInfo::deactivate()
        pool.cancelDelayedTask(shared_from_this(), lock_schedule);
 }

-void BackgroundSchedulePool::TaskInfo::activate()
+void BackgroundSchedulePoolTaskInfo::activate()
 {
    std::lock_guard lock(schedule_mutex);
    deactivated = false;
 }

-bool BackgroundSchedulePool::TaskInfo::activateAndSchedule()
+bool BackgroundSchedulePoolTaskInfo::activateAndSchedule()
 {
    std::lock_guard lock(schedule_mutex);

@ -91,7 +92,7 @@ bool BackgroundSchedulePool::TaskInfo::activateAndSchedule()
    return true;
 }

-void BackgroundSchedulePool::TaskInfo::execute()
+void BackgroundSchedulePoolTaskInfo::execute()
 {
    Stopwatch watch;
    CurrentMetrics::Increment metric_increment{CurrentMetrics::BackgroundSchedulePoolTask};
@ -131,7 +132,7 @@ void BackgroundSchedulePool::TaskInfo::execute()
    }
 }

-void BackgroundSchedulePool::TaskInfo::scheduleImpl(std::lock_guard<std::mutex> & schedule_mutex_lock)
+void BackgroundSchedulePoolTaskInfo::scheduleImpl(std::lock_guard<std::mutex> & schedule_mutex_lock)
 {
    scheduled = true;

@ -145,7 +146,7 @@ void BackgroundSchedulePool::TaskInfo::scheduleImpl(std::lock_guard<std::mutex>
        pool.queue.enqueueNotification(new TaskNotification(shared_from_this()));
 }

-Coordination::WatchCallback BackgroundSchedulePool::TaskInfo::getWatchCallback()
+Coordination::WatchCallback BackgroundSchedulePoolTaskInfo::getWatchCallback()
 {
     return [t = shared_from_this()](const Coordination::WatchResponse &)
     {
--- a/dbms/src/Core/BackgroundSchedulePool.h
+++ b/dbms/src/Core/BackgroundSchedulePool.h
@ -20,6 +20,8 @@ namespace DB
 {

 class TaskNotification;
+class BackgroundSchedulePoolTaskInfo;
+class BackgroundSchedulePoolTaskHolder;


 /** Executes functions scheduled at a specific point in time.
@ -35,84 +37,14 @@ class TaskNotification;
 class BackgroundSchedulePool
 {
 public:
-    class TaskInfo;
+    friend class BackgroundSchedulePoolTaskInfo;
+
+    using TaskInfo = BackgroundSchedulePoolTaskInfo;
    using TaskInfoPtr = std::shared_ptr<TaskInfo>;
    using TaskFunc = std::function<void()>;
+    using TaskHolder = BackgroundSchedulePoolTaskHolder;
    using DelayedTasks = std::multimap<Poco::Timestamp, TaskInfoPtr>;

-    class TaskInfo : public std::enable_shared_from_this<TaskInfo>, private boost::noncopyable
-    {
-    public:
-        TaskInfo(BackgroundSchedulePool & pool_, const std::string & log_name_, const TaskFunc & function_);
-
-        /// Schedule for execution as soon as possible (if not already scheduled).
-        /// If the task was already scheduled with delay, the delay will be ignored.
-        bool schedule();
-
-        /// Schedule for execution after specified delay.
-        bool scheduleAfter(size_t ms);
-
-        /// Further attempts to schedule become no-op. Will wait till the end of the current execution of the task.
-        void deactivate();
-
-        void activate();
-
-        /// Atomically activate task and schedule it for execution.
-        bool activateAndSchedule();
-
-        /// get Coordination::WatchCallback needed for notifications from ZooKeeper watches.
-        Coordination::WatchCallback getWatchCallback();
-
-    private:
-        friend class TaskNotification;
-        friend class BackgroundSchedulePool;
-
-        void execute();
-
-        void scheduleImpl(std::lock_guard<std::mutex> & schedule_mutex_lock);
-
-        BackgroundSchedulePool & pool;
-        std::string log_name;
-        TaskFunc function;
-
-        std::mutex exec_mutex;
-        std::mutex schedule_mutex;
-
-        /// Invariants:
-        /// * If deactivated is true then scheduled, delayed and executing are all false.
-        /// * scheduled and delayed cannot be true at the same time.
-        bool deactivated = false;
-        bool scheduled = false;
-        bool delayed = false;
-        bool executing = false;
-
-        /// If the task is scheduled with delay, points to element of delayed_tasks.
-        DelayedTasks::iterator iterator;
-    };
-
-    class TaskHolder
-    {
-    public:
-        TaskHolder() = default;
-        explicit TaskHolder(const TaskInfoPtr & task_info_) : task_info(task_info_) {}
-        TaskHolder(const TaskHolder & other) = delete;
-        TaskHolder(TaskHolder && other) noexcept = default;
-        TaskHolder & operator=(const TaskHolder & other) noexcept = delete;
-        TaskHolder & operator=(TaskHolder && other) noexcept = default;
-
-        ~TaskHolder()
-        {
-            if (task_info)
-                task_info->deactivate();
-        }
-
-        TaskInfo * operator->() { return task_info.get(); }
-        const TaskInfo * operator->() const { return task_info.get(); }
-
-    private:
-        TaskInfoPtr task_info;
-    };
-
    TaskHolder createTask(const std::string & log_name, const TaskFunc & function);

    size_t getNumberOfThreads() const { return size; }
@ -153,4 +85,81 @@ private:
    void attachToThreadGroup();
 };

+
+class BackgroundSchedulePoolTaskInfo : public std::enable_shared_from_this<BackgroundSchedulePoolTaskInfo>, private boost::noncopyable
+{
+public:
+    BackgroundSchedulePoolTaskInfo(BackgroundSchedulePool & pool_, const std::string & log_name_, const BackgroundSchedulePool::TaskFunc & function_);
+
+    /// Schedule for execution as soon as possible (if not already scheduled).
+    /// If the task was already scheduled with delay, the delay will be ignored.
+    bool schedule();
+
+    /// Schedule for execution after specified delay.
+    bool scheduleAfter(size_t ms);
+
+    /// Further attempts to schedule become no-op. Will wait till the end of the current execution of the task.
+    void deactivate();
+
+    void activate();
+
+    /// Atomically activate task and schedule it for execution.
+    bool activateAndSchedule();
+
+    /// get Coordination::WatchCallback needed for notifications from ZooKeeper watches.
+    Coordination::WatchCallback getWatchCallback();
+
+private:
+    friend class TaskNotification;
+    friend class BackgroundSchedulePool;
+
+    void execute();
+
+    void scheduleImpl(std::lock_guard<std::mutex> & schedule_mutex_lock);
+
+    BackgroundSchedulePool & pool;
+    std::string log_name;
+    BackgroundSchedulePool::TaskFunc function;
+
+    std::mutex exec_mutex;
+    std::mutex schedule_mutex;
+
+    /// Invariants:
+    /// * If deactivated is true then scheduled, delayed and executing are all false.
+    /// * scheduled and delayed cannot be true at the same time.
+    bool deactivated = false;
+    bool scheduled = false;
+    bool delayed = false;
+    bool executing = false;
+
+    /// If the task is scheduled with delay, points to element of delayed_tasks.
+    BackgroundSchedulePool::DelayedTasks::iterator iterator;
+};
+
+using BackgroundSchedulePoolTaskInfoPtr = std::shared_ptr<BackgroundSchedulePoolTaskInfo>;
+
+
+class BackgroundSchedulePoolTaskHolder
+{
+public:
+    BackgroundSchedulePoolTaskHolder() = default;
+    explicit BackgroundSchedulePoolTaskHolder(const BackgroundSchedulePoolTaskInfoPtr & task_info_) : task_info(task_info_) {}
+    BackgroundSchedulePoolTaskHolder(const BackgroundSchedulePoolTaskHolder & other) = delete;
+    BackgroundSchedulePoolTaskHolder(BackgroundSchedulePoolTaskHolder && other) noexcept = default;
+    BackgroundSchedulePoolTaskHolder & operator=(const BackgroundSchedulePoolTaskHolder & other) noexcept = delete;
+    BackgroundSchedulePoolTaskHolder & operator=(BackgroundSchedulePoolTaskHolder && other) noexcept = default;
+
+    ~BackgroundSchedulePoolTaskHolder()
+    {
+        if (task_info)
+            task_info->deactivate();
+    }
+
+    BackgroundSchedulePoolTaskInfo * operator->() { return task_info.get(); }
+    const BackgroundSchedulePoolTaskInfo * operator->() const { return task_info.get(); }
+
+private:
+    BackgroundSchedulePoolTaskInfoPtr task_info;
+};
+
 }
--- a/dbms/src/DataStreams/AsynchronousBlockInputStream.h
+++ b/dbms/src/DataStreams/AsynchronousBlockInputStream.h
@ -5,8 +5,6 @@
 #include <DataStreams/IBlockInputStream.h>
 #include <Common/CurrentMetrics.h>
 #include <Common/ThreadPool.h>
-#include <Common/MemoryTracker.h>
-#include <Poco/Ext/ThreadNumber.h>


 namespace CurrentMetrics
--- a/dbms/src/DataStreams/BlockIO.h
+++ b/dbms/src/DataStreams/BlockIO.h
@ -43,6 +43,9 @@ struct BlockIO

    BlockIO & operator= (const BlockIO & rhs)
    {
+        if (this == &rhs)
+            return *this;
+
        out.reset();
        in.reset();
        process_list_entry.reset();
--- a/dbms/src/DataStreams/MarkInCompressedFile.h
+++ b/dbms/src/DataStreams/MarkInCompressedFile.h
@ -6,6 +6,10 @@
 #include <IO/WriteHelpers.h>
 #include <Common/PODArray.h>

+#include <Common/config.h>
+#if USE_LFALLOC
+#include <Common/LFAllocator.h>
+#endif

 namespace DB
 {
@ -33,7 +37,9 @@ struct MarkInCompressedFile
        return "(" + DB::toString(offset_in_compressed_file) + "," + DB::toString(offset_in_decompressed_block) + ")";
    }
 };
-
+#if USE_LFALLOC
+using MarksInCompressedFile = PODArray<MarkInCompressedFile, 4096, LFAllocator>;
+#else
 using MarksInCompressedFile = PODArray<MarkInCompressedFile>;
-
+#endif
 }
--- a/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp
+++ b/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp
@ -1,7 +1,6 @@
 #include <future>
 #include <Common/setThreadName.h>
 #include <Common/CurrentMetrics.h>
-#include <Common/MemoryTracker.h>
 #include <DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.h>
 #include <Common/CurrentThread.h>

--- a/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.h
+++ b/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.h
@ -8,8 +8,6 @@
 #include <condition_variable>


-class MemoryTracker;
-
 namespace DB
 {

--- a/dbms/src/DataStreams/ParallelInputsProcessor.h
+++ b/dbms/src/DataStreams/ParallelInputsProcessor.h
@ -11,7 +11,6 @@
 #include <DataStreams/IBlockInputStream.h>
 #include <Common/setThreadName.h>
 #include <Common/CurrentMetrics.h>
-#include <Common/MemoryTracker.h>
 #include <Common/CurrentThread.h>
 #include <Common/ThreadPool.h>

--- a/dbms/src/DataStreams/TTLBlockInputStream.cpp
+++ b/dbms/src/DataStreams/TTLBlockInputStream.cpp
@ -0,0 +1,208 @@
+#include <DataStreams/TTLBlockInputStream.h>
+#include <DataTypes/DataTypeDate.h>
+#include <Interpreters/evaluateMissingDefaults.h>
+#include <Interpreters/SyntaxAnalyzer.h>
+#include <Interpreters/ExpressionAnalyzer.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+
+
+TTLBlockInputStream::TTLBlockInputStream(
+    const BlockInputStreamPtr & input_,
+    const MergeTreeData & storage_,
+    const MergeTreeData::MutableDataPartPtr & data_part_,
+    time_t current_time_)
+    : storage(storage_)
+    , data_part(data_part_)
+    , current_time(current_time_)
+    , old_ttl_infos(data_part->ttl_infos)
+    , log(&Logger::get(storage.getLogName() + " (TTLBlockInputStream)"))
+    , date_lut(DateLUT::instance())
+{
+    children.push_back(input_);
+
+    const auto & column_defaults = storage.getColumns().getDefaults();
+    ASTPtr default_expr_list = std::make_shared<ASTExpressionList>();
+    for (const auto & [name, ttl_info] : old_ttl_infos.columns_ttl)
+    {
+        if (ttl_info.min <= current_time)
+        {
+            new_ttl_infos.columns_ttl.emplace(name, MergeTreeDataPart::TTLInfo{});
+            empty_columns.emplace(name);
+
+            auto it = column_defaults.find(name);
+
+            if (it != column_defaults.end())
+                default_expr_list->children.emplace_back(
+                    setAlias(it->second.expression, it->first));
+        }
+        else
+            new_ttl_infos.columns_ttl.emplace(name, ttl_info);
+    }
+
+    if (old_ttl_infos.table_ttl.min > current_time)
+        new_ttl_infos.table_ttl = old_ttl_infos.table_ttl;
+
+    if (!default_expr_list->children.empty())
+    {
+        auto syntax_result = SyntaxAnalyzer(storage.global_context).analyze(
+            default_expr_list, storage.getColumns().getAllPhysical());
+        defaults_expression = ExpressionAnalyzer{default_expr_list, syntax_result, storage.global_context}.getActions(true);
+    }
+}
+
+
+Block TTLBlockInputStream::getHeader() const
+{
+    return children.at(0)->getHeader();
+}
+
+Block TTLBlockInputStream::readImpl()
+{
+    Block block = children.at(0)->read();
+    if (!block)
+        return block;
+
+    if (storage.hasTableTTL())
+    {
+        /// Skip all data if table ttl is expired for part
+        if (old_ttl_infos.table_ttl.max <= current_time)
+        {
+            rows_removed = data_part->rows_count;
+            return {};
+        }
+
+        if (old_ttl_infos.table_ttl.min <= current_time)
+            removeRowsWithExpiredTableTTL(block);
+    }
+
+    removeValuesWithExpiredColumnTTL(block);
+
+    return block;
+}
+
+void TTLBlockInputStream::readSuffixImpl()
+{
+    for (const auto & elem : new_ttl_infos.columns_ttl)
+        new_ttl_infos.updatePartMinTTL(elem.second.min);
+
+    new_ttl_infos.updatePartMinTTL(new_ttl_infos.table_ttl.min);
+
+    data_part->ttl_infos = std::move(new_ttl_infos);
+    data_part->empty_columns = std::move(empty_columns);
+
+    if (rows_removed)
+        LOG_INFO(log, "Removed " << rows_removed << " rows with expired ttl from part " << data_part->name);
+}
+
+void TTLBlockInputStream::removeRowsWithExpiredTableTTL(Block & block)
+{
+    storage.ttl_table_entry.expression->execute(block);
+
+    const auto & current = block.getByName(storage.ttl_table_entry.result_column);
+    const IColumn * ttl_column = current.column.get();
+
+    MutableColumns result_columns;
+    result_columns.reserve(getHeader().columns());
+    for (const auto & name : storage.getColumns().getNamesOfPhysical())
+    {
+        auto & column_with_type = block.getByName(name);
+        const IColumn * values_column = column_with_type.column.get();
+        MutableColumnPtr result_column = values_column->cloneEmpty();
+        result_column->reserve(block.rows());
+
+        for (size_t i = 0; i < block.rows(); ++i)
+        {
+            UInt32 cur_ttl = getTimestampByIndex(ttl_column, i);
+            if (cur_ttl > current_time)
+            {
+                new_ttl_infos.table_ttl.update(cur_ttl);
+                result_column->insertFrom(*values_column, i);
+            }
+            else
+                ++rows_removed;
+        }
+        result_columns.emplace_back(std::move(result_column));
+    }
+
+    block = getHeader().cloneWithColumns(std::move(result_columns));
+}
+
+void TTLBlockInputStream::removeValuesWithExpiredColumnTTL(Block & block)
+{
+    Block block_with_defaults;
+    if (defaults_expression)
+    {
+        block_with_defaults = block;
+        defaults_expression->execute(block_with_defaults);
+    }
+
+    for (const auto & [name, ttl_entry] : storage.ttl_entries_by_name)
+    {
+        const auto & old_ttl_info = old_ttl_infos.columns_ttl[name];
+        auto & new_ttl_info = new_ttl_infos.columns_ttl[name];
+
+        if (old_ttl_info.min > current_time)
+            continue;
+
+        if (old_ttl_info.max <= current_time)
+            continue;
+
+        if (!block.has(ttl_entry.result_column))
+            ttl_entry.expression->execute(block);
+
+        ColumnPtr default_column = nullptr;
+        if (block_with_defaults.has(name))
+            default_column = block_with_defaults.getByName(name).column->convertToFullColumnIfConst();
+
+        auto & column_with_type = block.getByName(name);
+        const IColumn * values_column = column_with_type.column.get();
+        MutableColumnPtr result_column = values_column->cloneEmpty();
+        result_column->reserve(block.rows());
+
+        const auto & current = block.getByName(ttl_entry.result_column);
+        const IColumn * ttl_column = current.column.get();
+
+        for (size_t i = 0; i < block.rows(); ++i)
+        {
+            UInt32 cur_ttl = getTimestampByIndex(ttl_column, i);
+
+            if (cur_ttl <= current_time)
+            {
+                if (default_column)
+                    result_column->insertFrom(*default_column, i);
+                else
+                    result_column->insertDefault();
+            }
+            else
+            {
+                new_ttl_info.update(cur_ttl);
+                empty_columns.erase(name);
+                result_column->insertFrom(*values_column, i);
+            }
+        }
+        column_with_type.column = std::move(result_column);
+    }
+
+    for (const auto & elem : storage.ttl_entries_by_name)
+        if (block.has(elem.second.result_column))
+            block.erase(elem.second.result_column);
+}
+
+UInt32 TTLBlockInputStream::getTimestampByIndex(const IColumn * column, size_t ind)
+{
+    if (const ColumnUInt16 * column_date = typeid_cast<const ColumnUInt16 *>(column))
+        return date_lut.fromDayNum(DayNum(column_date->getData()[ind]));
+    else if (const ColumnUInt32 * column_date_time = typeid_cast<const ColumnUInt32 *>(column))
+        return column_date_time->getData()[ind];
+    else
+        throw Exception("Unexpected type of result ttl column", ErrorCodes::LOGICAL_ERROR);
+}
+
+}
--- a/dbms/src/DataStreams/TTLBlockInputStream.h
+++ b/dbms/src/DataStreams/TTLBlockInputStream.h
@ -0,0 +1,60 @@
+#pragma once
+#include <DataStreams/IBlockInputStream.h>
+#include <Storages/MergeTree/MergeTreeData.h>
+#include <Storages/MergeTree/MergeTreeDataPart.h>
+#include <Core/Block.h>
+
+#include <common/DateLUT.h>
+
+namespace DB
+{
+
+class TTLBlockInputStream : public IBlockInputStream
+{
+public:
+    TTLBlockInputStream(
+        const BlockInputStreamPtr & input_,
+        const MergeTreeData & storage_,
+        const MergeTreeData::MutableDataPartPtr & data_part_,
+        time_t current_time
+    );
+
+    String getName() const override { return "TTLBlockInputStream"; }
+
+    Block getHeader() const override;
+
+protected:
+    Block readImpl() override;
+
+    /// Finalizes ttl infos and updates data part
+    void readSuffixImpl() override;
+
+private:
+    const MergeTreeData & storage;
+
+    /// ttl_infos and empty_columns are updating while reading
+    const MergeTreeData::MutableDataPartPtr & data_part;
+
+    time_t current_time;
+
+    MergeTreeDataPart::TTLInfos old_ttl_infos;
+    MergeTreeDataPart::TTLInfos new_ttl_infos;
+    NameSet empty_columns;
+
+    size_t rows_removed = 0;
+    Logger * log;
+    DateLUTImpl date_lut;
+
+    std::unordered_map<String, String> defaults_result_column;
+    ExpressionActionsPtr defaults_expression;
+private:
+    /// Removes values with expired ttl and computes new min_ttl and empty_columns for part
+    void removeValuesWithExpiredColumnTTL(Block & block);
+
+    /// Remove rows with expired table ttl and computes new min_ttl for part
+    void removeRowsWithExpiredTableTTL(Block & block);
+
+    UInt32 getTimestampByIndex(const IColumn * column, size_t ind);
+};
+
+}
--- a/dbms/src/DataTypes/DataTypeDomainIPv4AndIPv6.cpp
+++ b/dbms/src/DataTypes/DataTypeDomainIPv4AndIPv6.cpp
@ -20,7 +20,7 @@ namespace ErrorCodes
 namespace
 {

-class DataTypeDomanIPv4 : public DataTypeDomainWithSimpleSerialization
+class DataTypeDomainIPv4 : public DataTypeDomainWithSimpleSerialization
 {
 public:
    const char * getName() const override
@ -63,7 +63,7 @@ public:
    }
 };

-class DataTypeDomanIPv6 : public DataTypeDomainWithSimpleSerialization
+class DataTypeDomainIPv6 : public DataTypeDomainWithSimpleSerialization
 {
 public:
    const char * getName() const override
@ -111,8 +111,8 @@ public:

 void registerDataTypeDomainIPv4AndIPv6(DataTypeFactory & factory)
 {
-    factory.registerDataTypeDomain("UInt32", std::make_unique<DataTypeDomanIPv4>());
-    factory.registerDataTypeDomain("FixedString(16)", std::make_unique<DataTypeDomanIPv6>());
+    factory.registerDataTypeDomain("UInt32", std::make_unique<DataTypeDomainIPv4>());
+    factory.registerDataTypeDomain("FixedString(16)", std::make_unique<DataTypeDomainIPv6>());
 }

 } // namespace DB
--- a/dbms/src/DataTypes/DataTypeLowCardinality.cpp
+++ b/dbms/src/DataTypes/DataTypeLowCardinality.cpp
@ -690,10 +690,9 @@ void DataTypeLowCardinality::deserializeBinaryBulkWithMultipleStreams(
    };

    if (!settings.continuous_reading)
+    {
        low_cardinality_state->num_pending_rows = 0;

-    if (!settings.continuous_reading)
-    {
        /// Remember in state that some granules were skipped and we need to update dictionary.
        low_cardinality_state->need_update_dictionary = true;
    }
--- a/dbms/src/DataTypes/IDataTypeDummy.h
+++ b/dbms/src/DataTypes/IDataTypeDummy.h
@ -14,7 +14,7 @@ namespace DB
 class IDataTypeDummy : public DataTypeWithSimpleSerialization
 {
 private:
-    void throwNoSerialization() const
+    [[noreturn]] void throwNoSerialization() const
    {
        throw Exception("Serialization is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED);
    }
--- a/dbms/src/Formats/ProtobufColumnMatcher.h
+++ b/dbms/src/Formats/ProtobufColumnMatcher.h
@ -77,7 +77,7 @@ namespace ProtobufColumnMatcher

    namespace details
    {
-        void throwNoCommonColumns();
+        [[noreturn]] void throwNoCommonColumns();

        class ColumnNameMatcher
        {
--- a/dbms/src/Formats/ProtobufReader.cpp
+++ b/dbms/src/Formats/ProtobufReader.cpp
@ -385,73 +385,61 @@ public:
    bool readStringInto(PaddedPODArray<UInt8> &) override
    {
        cannotConvertType("String");
-        return false;
    }

    bool readInt8(Int8 &) override
    {
        cannotConvertType("Int8");
-        return false;
    }

    bool readUInt8(UInt8 &) override
    {
        cannotConvertType("UInt8");
-        return false;
    }

    bool readInt16(Int16 &) override
    {
        cannotConvertType("Int16");
-        return false;
    }

    bool readUInt16(UInt16 &) override
    {
        cannotConvertType("UInt16");
-        return false;
    }

    bool readInt32(Int32 &) override
    {
        cannotConvertType("Int32");
-        return false;
    }

    bool readUInt32(UInt32 &) override
    {
        cannotConvertType("UInt32");
-        return false;
    }

    bool readInt64(Int64 &) override
    {
        cannotConvertType("Int64");
-        return false;
    }

    bool readUInt64(UInt64 &) override
    {
        cannotConvertType("UInt64");
-        return false;
    }

    bool readUInt128(UInt128 &) override
    {
        cannotConvertType("UInt128");
-        return false;
    }

    bool readFloat32(Float32 &) override
    {
        cannotConvertType("Float32");
-        return false;
    }

    bool readFloat64(Float64 &) override
    {
        cannotConvertType("Float64");
-        return false;
    }

    void prepareEnumMapping8(const std::vector<std::pair<std::string, Int8>> &) override {}
@ -460,59 +448,50 @@ public:
    bool readEnum8(Int8 &) override
    {
        cannotConvertType("Enum");
-        return false;
    }

    bool readEnum16(Int16 &) override
    {
        cannotConvertType("Enum");
-        return false;
    }

    bool readUUID(UUID &) override
    {
        cannotConvertType("UUID");
-        return false;
    }

    bool readDate(DayNum &) override
    {
        cannotConvertType("Date");
-        return false;
    }

    bool readDateTime(time_t &) override
    {
        cannotConvertType("DateTime");
-        return false;
    }

    bool readDecimal32(Decimal32 &, UInt32, UInt32) override
    {
        cannotConvertType("Decimal32");
-        return false;
    }

    bool readDecimal64(Decimal64 &, UInt32, UInt32) override
    {
        cannotConvertType("Decimal64");
-        return false;
    }

    bool readDecimal128(Decimal128 &, UInt32, UInt32) override
    {
        cannotConvertType("Decimal128");
-        return false;
    }

    bool readAggregateFunction(const AggregateFunctionPtr &, AggregateDataPtr, Arena &) override
    {
        cannotConvertType("AggregateFunction");
-        return false;
    }

 protected:
-    void cannotConvertType(const String & type_name)
+    [[noreturn]] void cannotConvertType(const String & type_name)
    {
        throw Exception(
            String("Could not convert type '") + field->type_name() + "' from protobuf field '" + field->name() + "' to data type '"
@ -520,7 +499,7 @@ protected:
            ErrorCodes::PROTOBUF_BAD_CAST);
    }

-    void cannotConvertValue(const String & value, const String & type_name)
+    [[noreturn]] void cannotConvertValue(const String & value, const String & type_name)
    {
        throw Exception(
            "Could not convert value '" + value + "' from protobuf field '" + field->name() + "' to data type '" + type_name + "'",
@ -557,7 +536,6 @@ protected:
        catch (...)
        {
            cannotConvertValue(StringRef(str.data(), str.size()).toString(), TypeName<To>::get());
-            __builtin_unreachable();
        }
    }

--- a/dbms/src/Formats/ProtobufWriter.cpp
+++ b/dbms/src/Formats/ProtobufWriter.cpp
@ -334,14 +334,14 @@ public:
    virtual void writeAggregateFunction(const AggregateFunctionPtr &, ConstAggregateDataPtr) override { cannotConvertType("AggregateFunction"); }

 protected:
-    void cannotConvertType(const String & type_name)
+    [[noreturn]] void cannotConvertType(const String & type_name)
    {
        throw Exception(
            "Could not convert data type '" + type_name + "' to protobuf type '" + field->type_name() + "' (field: " + field->name() + ")",
            ErrorCodes::PROTOBUF_BAD_CAST);
    }

-    void cannotConvertValue(const String & value)
+    [[noreturn]] void cannotConvertValue(const String & value)
    {
        throw Exception(
            "Could not convert value '" + value + "' to protobuf type '" + field->type_name() + "' (field: " + field->name() + ")",
--- a/dbms/src/Functions/FunctionsConversion.h
+++ b/dbms/src/Functions/FunctionsConversion.h
@ -423,7 +423,7 @@ inline bool tryParseImpl<DataTypeDateTime>(DataTypeDateTime::FieldType & x, Read

 /** Throw exception with verbose message when string value is not parsed completely.
  */
-void throwExceptionForIncompletelyParsedValue(ReadBuffer & read_buffer, Block & block, size_t result);
+[[noreturn]] void throwExceptionForIncompletelyParsedValue(ReadBuffer & read_buffer, Block & block, size_t result);


 enum class ConvertFromStringExceptionMode
--- a/dbms/src/Functions/GatherUtils/Algorithms.h
+++ b/dbms/src/Functions/GatherUtils/Algorithms.h
@ -520,7 +520,7 @@ void resizeDynamicSize(ArraySource && array_source, ValueSource && value_source,
    while (!sink.isEnd())
    {
        size_t row_num = array_source.rowNum();
-        bool has_size = !size_null_map || (size_null_map && (*size_null_map)[row_num]);
+        bool has_size = !size_null_map || (*size_null_map)[row_num];

        if (has_size)
        {
--- a/dbms/src/Functions/GatherUtils/Sources.h
+++ b/dbms/src/Functions/GatherUtils/Sources.h
@ -8,12 +8,14 @@
 #include <Columns/ColumnNullable.h>

 #include <Common/typeid_cast.h>
+#include <Common/UTF8Helpers.h>

 #include <Functions/GatherUtils/IArraySource.h>
 #include <Functions/GatherUtils/IValueSource.h>
 #include <Functions/GatherUtils/Slices.h>
 #include <Functions/FunctionHelpers.h>

+
 namespace DB
 {

@ -276,6 +278,92 @@ struct StringSource
 };


+/// Differs to StringSource by having 'offest' and 'length' in code points instead of bytes in getSlice* methods.
+/** NOTE: The behaviour of substring and substringUTF8 is inconsistent when negative offset is greater than string size:
+  * substring:
+  *      hello
+  * ^-----^ - offset -10, length 7, result: "he"
+  * substringUTF8:
+  *      hello
+  *      ^-----^ - offset -10, length 7, result: "hello"
+  * This may be subject for change.
+  */
+struct UTF8StringSource : public StringSource
+{
+    using StringSource::StringSource;
+
+    static const ColumnString::Char * skipCodePointsForward(const ColumnString::Char * pos, size_t size, const ColumnString::Char * end)
+    {
+        for (size_t i = 0; i < size && pos < end; ++i)
+            pos += UTF8::seqLength(*pos);   /// NOTE pos may become greater than end. It is Ok due to padding in PaddedPODArray.
+        return pos;
+    }
+
+    static const ColumnString::Char * skipCodePointsBackward(const ColumnString::Char * pos, size_t size, const ColumnString::Char * begin)
+    {
+        for (size_t i = 0; i < size && pos > begin; ++i)
+        {
+            --pos;
+            if (pos == begin)
+                break;
+            UTF8::syncBackward(pos, begin);
+        }
+        return pos;
+    }
+
+    Slice getSliceFromLeft(size_t offset) const
+    {
+        auto begin = &elements[prev_offset];
+        auto end = elements.data() + offsets[row_num] - 1;
+        auto res_begin = skipCodePointsForward(begin, offset, end);
+
+        if (res_begin >= end)
+            return {begin, 0};
+
+        return {res_begin, size_t(end - res_begin)};
+    }
+
+    Slice getSliceFromLeft(size_t offset, size_t length) const
+    {
+        auto begin = &elements[prev_offset];
+        auto end = elements.data() + offsets[row_num] - 1;
+        auto res_begin = skipCodePointsForward(begin, offset, end);
+
+        if (res_begin >= end)
+            return {begin, 0};
+
+        auto res_end = skipCodePointsForward(res_begin, length, end);
+
+        if (res_end >= end)
+            return {res_begin, size_t(end - res_begin)};
+
+        return {res_begin, size_t(res_end - res_begin)};
+    }
+
+    Slice getSliceFromRight(size_t offset) const
+    {
+        auto begin = &elements[prev_offset];
+        auto end = elements.data() + offsets[row_num] - 1;
+        auto res_begin = skipCodePointsBackward(end, offset, begin);
+
+        return {res_begin, size_t(end - res_begin)};
+    }
+
+    Slice getSliceFromRight(size_t offset, size_t length) const
+    {
+        auto begin = &elements[prev_offset];
+        auto end = elements.data() + offsets[row_num] - 1;
+        auto res_begin = skipCodePointsBackward(end, offset, begin);
+        auto res_end = skipCodePointsForward(res_begin, length, end);
+
+        if (res_end >= end)
+            return {res_begin, size_t(end - res_begin)};
+
+        return {res_begin, size_t(res_end - res_begin)};
+    }
+};
+
+
 struct FixedStringSource
 {
    using Slice = NumericArraySlice<UInt8>;
--- a/dbms/src/Functions/arrayIntersect.cpp
+++ b/dbms/src/Functions/arrayIntersect.cpp
@ -432,15 +432,20 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable
                    current_has_nullable = true;
                else
                {
+                    typename Map::mapped_type * value = nullptr;
+
                    if constexpr (is_numeric_column)
-                        ++map[columns[arg]->getElement(i)];
+                        value = &map[columns[arg]->getElement(i)];
                    else if constexpr (std::is_same<ColumnType, ColumnString>::value || std::is_same<ColumnType, ColumnFixedString>::value)
-                        ++map[columns[arg]->getDataAt(i)];
+                        value = &map[columns[arg]->getDataAt(i)];
                    else
                    {
                        const char * data = nullptr;
-                        ++map[columns[arg]->serializeValueIntoArena(i, arena, data)];
+                        value = &map[columns[arg]->serializeValueIntoArena(i, arena, data)];
                    }
+
+                    if (*value == arg)
+                        ++(*value);
                }
            }

--- a/dbms/src/Functions/arrayWithConstant.cpp
+++ b/dbms/src/Functions/arrayWithConstant.cpp
@ -61,7 +61,7 @@ public:
        {
            auto array_size = col_num->getInt(i);

-            if (unlikely(array_size) < 0)
+            if (unlikely(array_size < 0))
                throw Exception("Array size cannot be negative: while executing function " + getName(), ErrorCodes::TOO_LARGE_ARRAY_SIZE);

            offset += array_size;
--- a/dbms/src/Functions/if.cpp
+++ b/dbms/src/Functions/if.cpp
@ -153,7 +153,7 @@ template <typename A, typename B>
 struct NumIfImpl<A, B, NumberTraits::Error>
 {
 private:
-    static void throw_error()
+    [[noreturn]] static void throw_error()
    {
        throw Exception("Internal logic error: invalid types of arguments 2 and 3 of if", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
    }
--- a/dbms/src/Functions/isValidUTF8.cpp
+++ b/dbms/src/Functions/isValidUTF8.cpp
@ -0,0 +1,330 @@
+#include <DataTypes/DataTypeString.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionStringOrArrayToT.h>
+
+#include <cstring>
+
+#ifdef __SSE4_1__
+#    include <emmintrin.h>
+#    include <smmintrin.h>
+#    include <tmmintrin.h>
+#endif
+
+namespace DB
+{
+/// inspired by https://github.com/cyb70289/utf8/
+struct ValidUTF8Impl
+{
+    /*
+MIT License
+
+Copyright (c) 2019 Yibo Cai
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+*/
+
+    /*
+ * http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf - page 94
+ *
+ * Table 3-7. Well-Formed UTF-8 Byte Sequences
+ *
+ * +--------------------+------------+-------------+------------+-------------+
+ * | Code Points        | First Byte | Second Byte | Third Byte | Fourth Byte |
+ * +--------------------+------------+-------------+------------+-------------+
+ * | U+0000..U+007F     | 00..7F     |             |            |             |
+ * +--------------------+------------+-------------+------------+-------------+
+ * | U+0080..U+07FF     | C2..DF     | 80..BF      |            |             |
+ * +--------------------+------------+-------------+------------+-------------+
+ * | U+0800..U+0FFF     | E0         | A0..BF      | 80..BF     |             |
+ * +--------------------+------------+-------------+------------+-------------+
+ * | U+1000..U+CFFF     | E1..EC     | 80..BF      | 80..BF     |             |
+ * +--------------------+------------+-------------+------------+-------------+
+ * | U+D000..U+D7FF     | ED         | 80..9F      | 80..BF     |             |
+ * +--------------------+------------+-------------+------------+-------------+
+ * | U+E000..U+FFFF     | EE..EF     | 80..BF      | 80..BF     |             |
+ * +--------------------+------------+-------------+------------+-------------+
+ * | U+10000..U+3FFFF   | F0         | 90..BF      | 80..BF     | 80..BF      |
+ * +--------------------+------------+-------------+------------+-------------+
+ * | U+40000..U+FFFFF   | F1..F3     | 80..BF      | 80..BF     | 80..BF      |
+ * +--------------------+------------+-------------+------------+-------------+
+ * | U+100000..U+10FFFF | F4         | 80..8F      | 80..BF     | 80..BF      |
+ * +--------------------+------------+-------------+------------+-------------+
+ */
+
+    static inline UInt8 isValidUTF8Naive(const UInt8 * data, UInt64 len)
+    {
+        while (len)
+        {
+            int bytes;
+            const UInt8 byte1 = data[0];
+            /* 00..7F */
+            if (byte1 <= 0x7F)
+            {
+                bytes = 1;
+            }
+            /* C2..DF, 80..BF */
+            else if (len >= 2 && byte1 >= 0xC2 && byte1 <= 0xDF && static_cast<Int8>(data[1]) <= static_cast<Int8>(0xBF))
+            {
+                bytes = 2;
+            }
+            else if (len >= 3)
+            {
+                const UInt8 byte2 = data[1];
+                bool byte2_ok = static_cast<Int8>(byte2) <= static_cast<Int8>(0xBF);
+                bool byte3_ok = static_cast<Int8>(data[2]) <= static_cast<Int8>(0xBF);
+
+                if (byte2_ok && byte3_ok &&
+                    /* E0, A0..BF, 80..BF */
+                    ((byte1 == 0xE0 && byte2 >= 0xA0) ||
+                     /* E1..EC, 80..BF, 80..BF */
+                     (byte1 >= 0xE1 && byte1 <= 0xEC) ||
+                     /* ED, 80..9F, 80..BF */
+                     (byte1 == 0xED && byte2 <= 0x9F) ||
+                     /* EE..EF, 80..BF, 80..BF */
+                     (byte1 >= 0xEE && byte1 <= 0xEF)))
+                {
+                    bytes = 3;
+                }
+                else if (len >= 4)
+                {
+                    bool byte4_ok = static_cast<Int8>(data[3]) <= static_cast<Int8>(0xBF);
+                    if (byte2_ok && byte3_ok && byte4_ok &&
+                        /* F0, 90..BF, 80..BF, 80..BF */
+                        ((byte1 == 0xF0 && byte2 >= 0x90) ||
+                         /* F1..F3, 80..BF, 80..BF, 80..BF */
+                         (byte1 >= 0xF1 && byte1 <= 0xF3) ||
+                         /* F4, 80..8F, 80..BF, 80..BF */
+                         (byte1 == 0xF4 && byte2 <= 0x8F)))
+                    {
+                        bytes = 4;
+                    }
+                    else
+                    {
+                        return false;
+                    }
+                }
+                else
+                {
+                    return false;
+                }
+            }
+            else
+            {
+                return false;
+            }
+            len -= bytes;
+            data += bytes;
+        }
+        return true;
+    }
+
+#ifndef __SSE4_1__
+    static inline UInt8 isValidUTF8(const UInt8 * data, UInt64 len) { return isValidUTF8Naive(data, len); }
+#else
+    static inline UInt8 isValidUTF8(const UInt8 * data, UInt64 len)
+    {
+        /*
+        * Map high nibble of "First Byte" to legal character length minus 1
+        * 0x00 ~ 0xBF --> 0
+        * 0xC0 ~ 0xDF --> 1
+        * 0xE0 ~ 0xEF --> 2
+        * 0xF0 ~ 0xFF --> 3
+        */
+        const __m128i first_len_tbl = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 3);
+
+        /* Map "First Byte" to 8-th item of range table (0xC2 ~ 0xF4) */
+        const __m128i first_range_tbl = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8);
+
+        /*
+        * Range table, map range index to min and max values
+        */
+        const __m128i range_min_tbl
+            = _mm_setr_epi8(0x00, 0x80, 0x80, 0x80, 0xA0, 0x80, 0x90, 0x80, 0xC2, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F);
+
+        const __m128i range_max_tbl
+            = _mm_setr_epi8(0x7F, 0xBF, 0xBF, 0xBF, 0xBF, 0x9F, 0xBF, 0x8F, 0xF4, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
+
+        /*
+        * Tables for fast handling of four special First Bytes(E0,ED,F0,F4), after
+        * which the Second Byte are not 80~BF. It contains "range index adjustment".
+        * +------------+---------------+------------------+----------------+
+        * | First Byte | original range| range adjustment | adjusted range |
+        * +------------+---------------+------------------+----------------+
+        * | E0         | 2             | 2                | 4              |
+        * +------------+---------------+------------------+----------------+
+        * | ED         | 2             | 3                | 5              |
+        * +------------+---------------+------------------+----------------+
+        * | F0         | 3             | 3                | 6              |
+        * +------------+---------------+------------------+----------------+
+        * | F4         | 4             | 4                | 8              |
+        * +------------+---------------+------------------+----------------+
+        */
+
+        /* index1 -> E0, index14 -> ED */
+        const __m128i df_ee_tbl = _mm_setr_epi8(0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0);
+
+        /* index1 -> F0, index5 -> F4 */
+        const __m128i ef_fe_tbl = _mm_setr_epi8(0, 3, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+
+        __m128i prev_input = _mm_set1_epi8(0);
+        __m128i prev_first_len = _mm_set1_epi8(0);
+        __m128i error = _mm_set1_epi8(0);
+
+        auto check_packed = [&](__m128i input) noexcept
+        {
+            /* high_nibbles = input >> 4 */
+            const __m128i high_nibbles = _mm_and_si128(_mm_srli_epi16(input, 4), _mm_set1_epi8(0x0F));
+
+            /* first_len = legal character length minus 1 */
+            /* 0 for 00~7F, 1 for C0~DF, 2 for E0~EF, 3 for F0~FF */
+            /* first_len = first_len_tbl[high_nibbles] */
+            __m128i first_len = _mm_shuffle_epi8(first_len_tbl, high_nibbles);
+
+            /* First Byte: set range index to 8 for bytes within 0xC0 ~ 0xFF */
+            /* range = first_range_tbl[high_nibbles] */
+            __m128i range = _mm_shuffle_epi8(first_range_tbl, high_nibbles);
+
+            /* Second Byte: set range index to first_len */
+            /* 0 for 00~7F, 1 for C0~DF, 2 for E0~EF, 3 for F0~FF */
+            /* range |= (first_len, prev_first_len) << 1 byte */
+            range = _mm_or_si128(range, _mm_alignr_epi8(first_len, prev_first_len, 15));
+
+            /* Third Byte: set range index to saturate_sub(first_len, 1) */
+            /* 0 for 00~7F, 0 for C0~DF, 1 for E0~EF, 2 for F0~FF */
+            __m128i tmp1;
+            __m128i tmp2;
+            /* tmp1 = saturate_sub(first_len, 1) */
+            tmp1 = _mm_subs_epu8(first_len, _mm_set1_epi8(1));
+            /* tmp2 = saturate_sub(prev_first_len, 1) */
+            tmp2 = _mm_subs_epu8(prev_first_len, _mm_set1_epi8(1));
+            /* range |= (tmp1, tmp2) << 2 bytes */
+            range = _mm_or_si128(range, _mm_alignr_epi8(tmp1, tmp2, 14));
+
+            /* Fourth Byte: set range index to saturate_sub(first_len, 2) */
+            /* 0 for 00~7F, 0 for C0~DF, 0 for E0~EF, 1 for F0~FF */
+            /* tmp1 = saturate_sub(first_len, 2) */
+            tmp1 = _mm_subs_epu8(first_len, _mm_set1_epi8(2));
+            /* tmp2 = saturate_sub(prev_first_len, 2) */
+            tmp2 = _mm_subs_epu8(prev_first_len, _mm_set1_epi8(2));
+            /* range |= (tmp1, tmp2) << 3 bytes */
+            range = _mm_or_si128(range, _mm_alignr_epi8(tmp1, tmp2, 13));
+
+            /*
+             * Now we have below range indices caluclated
+             * Correct cases:
+             * - 8 for C0~FF
+             * - 3 for 1st byte after F0~FF
+             * - 2 for 1st byte after E0~EF or 2nd byte after F0~FF
+             * - 1 for 1st byte after C0~DF or 2nd byte after E0~EF or
+             *         3rd byte after F0~FF
+             * - 0 for others
+             * Error cases:
+             *   9,10,11 if non ascii First Byte overlaps
+             *   E.g., F1 80 C2 90 --> 8 3 10 2, where 10 indicates error
+             */
+
+            /* Adjust Second Byte range for special First Bytes(E0,ED,F0,F4) */
+            /* Overlaps lead to index 9~15, which are illegal in range table */
+            __m128i shift1, pos, range2;
+            /* shift1 = (input, prev_input) << 1 byte */
+            shift1 = _mm_alignr_epi8(input, prev_input, 15);
+            pos = _mm_sub_epi8(shift1, _mm_set1_epi8(0xEF));
+            /*
+             * shift1:  | EF  F0 ... FE | FF  00  ... ...  DE | DF  E0 ... EE |
+             * pos:     | 0   1      15 | 16  17           239| 240 241    255|
+             * pos-240: | 0   0      0  | 0   0            0  | 0   1      15 |
+             * pos+112: | 112 113    127|       >= 128        |     >= 128    |
+             */
+            tmp1 = _mm_subs_epu8(pos, _mm_set1_epi8(240));
+            range2 = _mm_shuffle_epi8(df_ee_tbl, tmp1);
+            tmp2 = _mm_adds_epu8(pos, _mm_set1_epi8(112));
+            range2 = _mm_add_epi8(range2, _mm_shuffle_epi8(ef_fe_tbl, tmp2));
+
+            range = _mm_add_epi8(range, range2);
+
+            /* Load min and max values per calculated range index */
+            __m128i minv = _mm_shuffle_epi8(range_min_tbl, range);
+            __m128i maxv = _mm_shuffle_epi8(range_max_tbl, range);
+
+            /* Check value range */
+            error = _mm_or_si128(error, _mm_cmplt_epi8(input, minv));
+            error = _mm_or_si128(error, _mm_cmpgt_epi8(input, maxv));
+
+            prev_input = input;
+            prev_first_len = first_len;
+
+            data += 16;
+            len -= 16;
+        };
+
+        while (len >= 16)
+            check_packed(_mm_loadu_si128(reinterpret_cast<const __m128i *>(data)));
+
+        /// 0 <= len <= 15 for now. Reading data from data - 1 because of right padding of 15 and left padding
+        /// Then zero some bytes from the unknown memory and check again.
+        alignas(16) char buf[32];
+        _mm_store_si128(reinterpret_cast<__m128i *>(buf), _mm_loadu_si128(reinterpret_cast<const __m128i *>(data - 1)));
+        memset(buf + len + 1, 0, 16);
+        check_packed(_mm_loadu_si128(reinterpret_cast<__m128i *>(buf + 1)));
+
+        /* Reduce error vector, error_reduced = 0xFFFF if error == 0 */
+        return _mm_movemask_epi8(_mm_cmpeq_epi8(error, _mm_set1_epi8(0))) == 0xFFFF;
+    }
+#endif
+
+    static constexpr bool is_fixed_to_constant = false;
+
+    static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray<UInt8> & res)
+    {
+        size_t size = offsets.size();
+        size_t prev_offset = 0;
+        for (size_t i = 0; i < size; ++i)
+        {
+            res[i] = isValidUTF8(data.data() + prev_offset, offsets[i] - 1 - prev_offset);
+            prev_offset = offsets[i];
+        }
+    }
+
+    static void vector_fixed_to_constant(const ColumnString::Chars & /*data*/, size_t /*n*/, UInt8 & /*res*/) {}
+
+    static void vector_fixed_to_vector(const ColumnString::Chars & data, size_t n, PaddedPODArray<UInt8> & res)
+    {
+        size_t size = data.size() / n;
+        for (size_t i = 0; i < size; ++i)
+            res[i] = isValidUTF8(data.data() + i * n, n);
+    }
+
+    static void array(const ColumnString::Offsets &, PaddedPODArray<UInt8> &)
+    {
+        throw Exception("Cannot apply function isValidUTF8 to Array argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+    }
+};
+
+struct NameValidUTF8
+{
+    static constexpr auto name = "isValidUTF8";
+};
+using FunctionValidUTF8 = FunctionStringOrArrayToT<ValidUTF8Impl, NameValidUTF8, UInt8>;
+
+void registerFunctionValidUTF8(FunctionFactory & factory)
+{
+    factory.registerFunction<FunctionValidUTF8>();
+}
+
+}
--- a/dbms/src/Functions/queryStringAndFragment.h
+++ b/dbms/src/Functions/queryStringAndFragment.h
@ -17,15 +17,15 @@ struct ExtractQueryStringAndFragment
        res_data = data;
        res_size = 0;

-        Pos pos = data;
-        Pos end = pos + size;
+        Pos end = data + size;
+        Pos pos;

-        if (end != (pos = find_first_symbols<'?'>(pos, end)))
+        if (end != (pos = find_first_symbols<'?'>(data, end)))
        {
            res_data = pos + (without_leading_char ? 1 : 0);
            res_size = end - res_data;
        }
-        else if (end != (pos = find_first_symbols<'#'>(pos, end)))
+        else if (end != (pos = find_first_symbols<'#'>(data, end)))
        {
            res_data = pos;
            res_size = end - res_data;
--- a/dbms/src/Functions/registerFunctionsString.cpp
+++ b/dbms/src/Functions/registerFunctionsString.cpp
@ -9,6 +9,7 @@ void registerFunctionEmpty(FunctionFactory &);
 void registerFunctionNotEmpty(FunctionFactory &);
 void registerFunctionLength(FunctionFactory &);
 void registerFunctionLengthUTF8(FunctionFactory &);
+void registerFunctionValidUTF8(FunctionFactory &);
 void registerFunctionLower(FunctionFactory &);
 void registerFunctionUpper(FunctionFactory &);
 void registerFunctionLowerUTF8(FunctionFactory &);
@ -17,7 +18,6 @@ void registerFunctionReverse(FunctionFactory &);
 void registerFunctionReverseUTF8(FunctionFactory &);
 void registerFunctionsConcat(FunctionFactory &);
 void registerFunctionSubstring(FunctionFactory &);
-void registerFunctionSubstringUTF8(FunctionFactory &);
 void registerFunctionAppendTrailingCharIfAbsent(FunctionFactory &);
 void registerFunctionStartsWith(FunctionFactory &);
 void registerFunctionEndsWith(FunctionFactory &);
@ -36,6 +36,7 @@ void registerFunctionsString(FunctionFactory & factory)
    registerFunctionNotEmpty(factory);
    registerFunctionLength(factory);
    registerFunctionLengthUTF8(factory);
+    registerFunctionValidUTF8(factory);
    registerFunctionLower(factory);
    registerFunctionUpper(factory);
    registerFunctionLowerUTF8(factory);
@ -44,7 +45,6 @@ void registerFunctionsString(FunctionFactory & factory)
    registerFunctionReverseUTF8(factory);
    registerFunctionsConcat(factory);
    registerFunctionSubstring(factory);
-    registerFunctionSubstringUTF8(factory);
    registerFunctionAppendTrailingCharIfAbsent(factory);
    registerFunctionStartsWith(factory);
    registerFunctionEndsWith(factory);
--- a/dbms/src/Functions/substring.cpp
+++ b/dbms/src/Functions/substring.cpp
@ -28,10 +28,13 @@ namespace ErrorCodes
 }


+/// If 'is_utf8' - measure offset and length in code points instead of bytes.
+/// UTF8 variant is not available for FixedString arguments.
+template <bool is_utf8>
 class FunctionSubstring : public IFunction
 {
 public:
-    static constexpr auto name = "substring";
+    static constexpr auto name = is_utf8 ? "substringUTF8" : "substring";
    static FunctionPtr create(const Context &)
    {
        return std::make_shared<FunctionSubstring>();
@ -56,7 +59,7 @@ public:
                + toString(number_of_arguments) + ", should be 2 or 3",
                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);

-        if (!isStringOrFixedString(arguments[0]))
+        if ((is_utf8 && !isString(arguments[0])) || !isStringOrFixedString(arguments[0]))
            throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);

        if (!isNumber(arguments[1]))
@ -80,7 +83,7 @@ public:
                              Int64 start_value, Int64 length_value, Block & block, size_t result, Source && source,
                              size_t input_rows_count)
    {
-       auto col_res = ColumnString::create();
+        auto col_res = ColumnString::create();

        if (!column_length)
        {
@ -145,30 +148,48 @@ public:
                throw Exception("Third argument provided for function substring could not be negative.", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
        }

-        if (const ColumnString * col = checkAndGetColumn<ColumnString>(column_string.get()))
-            executeForSource(column_start, column_length, column_start_const, column_length_const, start_value,
-                             length_value, block, result, StringSource(*col), input_rows_count);
-        else if (const ColumnFixedString * col_fixed = checkAndGetColumn<ColumnFixedString>(column_string.get()))
-            executeForSource(column_start, column_length, column_start_const, column_length_const, start_value,
-                             length_value, block, result, FixedStringSource(*col_fixed), input_rows_count);
-        else if (const ColumnConst * col_const = checkAndGetColumnConst<ColumnString>(column_string.get()))
-            executeForSource(column_start, column_length, column_start_const, column_length_const, start_value,
-                             length_value, block, result, ConstSource<StringSource>(*col_const), input_rows_count);
-        else if (const ColumnConst * col_const_fixed = checkAndGetColumnConst<ColumnFixedString>(column_string.get()))
-            executeForSource(column_start, column_length, column_start_const, column_length_const, start_value,
-                             length_value, block, result, ConstSource<FixedStringSource>(*col_const_fixed), input_rows_count);
+        if constexpr (is_utf8)
+        {
+            if (const ColumnString * col = checkAndGetColumn<ColumnString>(column_string.get()))
+                executeForSource(column_start, column_length, column_start_const, column_length_const, start_value,
+                                length_value, block, result, UTF8StringSource(*col), input_rows_count);
+            else if (const ColumnConst * col_const = checkAndGetColumnConst<ColumnString>(column_string.get()))
+                executeForSource(column_start, column_length, column_start_const, column_length_const, start_value,
+                                length_value, block, result, ConstSource<UTF8StringSource>(*col_const), input_rows_count);
+            else
+                throw Exception(
+                    "Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of first argument of function " + getName(),
+                    ErrorCodes::ILLEGAL_COLUMN);
+        }
        else
-            throw Exception(
-                "Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of first argument of function " + getName(),
-                ErrorCodes::ILLEGAL_COLUMN);
+        {
+            if (const ColumnString * col = checkAndGetColumn<ColumnString>(column_string.get()))
+                executeForSource(column_start, column_length, column_start_const, column_length_const, start_value,
+                                length_value, block, result, StringSource(*col), input_rows_count);
+            else if (const ColumnFixedString * col_fixed = checkAndGetColumn<ColumnFixedString>(column_string.get()))
+                executeForSource(column_start, column_length, column_start_const, column_length_const, start_value,
+                                length_value, block, result, FixedStringSource(*col_fixed), input_rows_count);
+            else if (const ColumnConst * col_const = checkAndGetColumnConst<ColumnString>(column_string.get()))
+                executeForSource(column_start, column_length, column_start_const, column_length_const, start_value,
+                                length_value, block, result, ConstSource<StringSource>(*col_const), input_rows_count);
+            else if (const ColumnConst * col_const_fixed = checkAndGetColumnConst<ColumnFixedString>(column_string.get()))
+                executeForSource(column_start, column_length, column_start_const, column_length_const, start_value,
+                                length_value, block, result, ConstSource<FixedStringSource>(*col_const_fixed), input_rows_count);
+            else
+                throw Exception(
+                    "Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of first argument of function " + getName(),
+                    ErrorCodes::ILLEGAL_COLUMN);
+        }
    }
 };

 void registerFunctionSubstring(FunctionFactory & factory)
 {
-    factory.registerFunction<FunctionSubstring>(FunctionFactory::CaseInsensitive);
-    factory.registerAlias("substr", FunctionSubstring::name, FunctionFactory::CaseInsensitive);
-    factory.registerAlias("mid", FunctionSubstring::name, FunctionFactory::CaseInsensitive); /// from MySQL dialect
+    factory.registerFunction<FunctionSubstring<false>>(FunctionFactory::CaseInsensitive);
+    factory.registerAlias("substr", "substring", FunctionFactory::CaseInsensitive);
+    factory.registerAlias("mid", "substring", FunctionFactory::CaseInsensitive); /// from MySQL dialect
+
+    factory.registerFunction<FunctionSubstring<true>>(FunctionFactory::CaseSensitive);
 }

 }
--- a/dbms/src/Functions/substringUTF8.cpp
+++ b/dbms/src/Functions/substringUTF8.cpp
@ -1,166 +0,0 @@
-#include <DataTypes/DataTypeString.h>
-#include <Columns/ColumnString.h>
-#include <Core/ColumnNumbers.h>
-#include <Functions/FunctionFactory.h>
-#include <Functions/FunctionHelpers.h>
-
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
-    extern const int ILLEGAL_COLUMN;
-    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
-    extern const int ARGUMENT_OUT_OF_BOUND;
-}
-
-
-/** If the string is encoded in UTF-8, then it selects a substring of code points in it.
-  * Otherwise, the behavior is undefined.
-  */
-struct SubstringUTF8Impl
-{
-    static void vector(const ColumnString::Chars & data,
-        const ColumnString::Offsets & offsets,
-        size_t start,
-        size_t length,
-        ColumnString::Chars & res_data,
-        ColumnString::Offsets & res_offsets)
-    {
-        res_data.reserve(data.size());
-        size_t size = offsets.size();
-        res_offsets.resize(size);
-
-        ColumnString::Offset prev_offset = 0;
-        ColumnString::Offset res_offset = 0;
-        for (size_t i = 0; i < size; ++i)
-        {
-            ColumnString::Offset j = prev_offset;
-            ColumnString::Offset pos = 1;
-            ColumnString::Offset bytes_start = 0;
-            ColumnString::Offset bytes_length = 0;
-            while (j < offsets[i] - 1)
-            {
-                if (pos == start)
-                    bytes_start = j - prev_offset + 1;
-
-                if (data[j] < 0xBF)
-                    j += 1;
-                else if (data[j] < 0xE0)
-                    j += 2;
-                else if (data[j] < 0xF0)
-                    j += 3;
-                else
-                    j += 1;
-
-                if (pos >= start && pos < start + length)
-                    bytes_length = j - prev_offset + 1 - bytes_start;
-                else if (pos >= start + length)
-                    break;
-
-                ++pos;
-            }
-
-            if (bytes_start == 0)
-            {
-                res_data.resize(res_data.size() + 1);
-                res_data[res_offset] = 0;
-                ++res_offset;
-            }
-            else
-            {
-                size_t bytes_to_copy = std::min(offsets[i] - prev_offset - bytes_start, bytes_length);
-                res_data.resize(res_data.size() + bytes_to_copy + 1);
-                memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &data[prev_offset + bytes_start - 1], bytes_to_copy);
-                res_offset += bytes_to_copy + 1;
-                res_data[res_offset - 1] = 0;
-            }
-            res_offsets[i] = res_offset;
-            prev_offset = offsets[i];
-        }
-    }
-};
-
-
-class FunctionSubstringUTF8 : public IFunction
-{
-public:
-    static constexpr auto name = "substringUTF8";
-    static FunctionPtr create(const Context &)
-    {
-        return std::make_shared<FunctionSubstringUTF8>();
-    }
-
-    String getName() const override
-    {
-        return name;
-    }
-
-    size_t getNumberOfArguments() const override
-    {
-        return 3;
-    }
-
-    bool useDefaultImplementationForConstants() const override { return true; }
-    ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; }
-
-    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
-    {
-        if (!isString(arguments[0]))
-            throw Exception(
-                "Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
-
-        if (!isNumber(arguments[1]) || !isNumber(arguments[2]))
-            throw Exception("Illegal type " + (isNumber(arguments[1]) ? arguments[2]->getName() : arguments[1]->getName())
-                    + " of argument of function "
-                    + getName(),
-                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
-
-        return std::make_shared<DataTypeString>();
-    }
-
-    void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
-    {
-        const ColumnPtr column_string = block.getByPosition(arguments[0]).column;
-        const ColumnPtr column_start = block.getByPosition(arguments[1]).column;
-        const ColumnPtr column_length = block.getByPosition(arguments[2]).column;
-
-        if (!column_start->isColumnConst() || !column_length->isColumnConst())
-            throw Exception("2nd and 3rd arguments of function " + getName() + " must be constants.", ErrorCodes::ILLEGAL_COLUMN);
-
-        Field start_field = (*block.getByPosition(arguments[1]).column)[0];
-        Field length_field = (*block.getByPosition(arguments[2]).column)[0];
-
-        if (start_field.getType() != Field::Types::UInt64 || length_field.getType() != Field::Types::UInt64)
-            throw Exception("2nd and 3rd arguments of function " + getName() + " must be non-negative and must have UInt type.", ErrorCodes::ILLEGAL_COLUMN);
-
-        UInt64 start = start_field.get<UInt64>();
-        UInt64 length = length_field.get<UInt64>();
-
-        if (start == 0)
-            throw Exception("Second argument of function substring must be greater than 0.", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
-
-        /// Otherwise may lead to overflow and pass bounds check inside inner loop.
-        if (start >= 0x8000000000000000ULL || length >= 0x8000000000000000ULL)
-            throw Exception("Too large values of 2nd or 3rd argument provided for function substring.", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
-
-        if (const ColumnString * col = checkAndGetColumn<ColumnString>(column_string.get()))
-        {
-            auto col_res = ColumnString::create();
-            SubstringUTF8Impl::vector(col->getChars(), col->getOffsets(), start, length, col_res->getChars(), col_res->getOffsets());
-            block.getByPosition(result).column = std::move(col_res);
-        }
-        else
-            throw Exception(
-                "Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of first argument of function " + getName(),
-                ErrorCodes::ILLEGAL_COLUMN);
-    }
-};
-
-void registerFunctionSubstringUTF8(FunctionFactory & factory)
-{
-    factory.registerFunction<FunctionSubstringUTF8>();
-}
-
-}
--- a/dbms/src/IO/BrotliReadBuffer.cpp
+++ b/dbms/src/IO/BrotliReadBuffer.cpp
@ -7,6 +7,12 @@
 namespace DB
 {

+namespace ErrorCodes
+{
+    extern const int BROTLI_READ_FAILED;
+}
+
+
 class BrotliReadBuffer::BrotliStateWrapper
 {
 public:
@ -29,7 +35,7 @@ public:
 BrotliReadBuffer::BrotliReadBuffer(ReadBuffer &in_, size_t buf_size, char *existing_memory, size_t alignment)
        : BufferWithOwnMemory<ReadBuffer>(buf_size, existing_memory, alignment)
        , in(in_)
-        , brotli(new BrotliStateWrapper())
+        , brotli(std::make_unique<BrotliStateWrapper>())
        , in_available(0)
        , in_data(nullptr)
        , out_capacity(0)
@ -56,7 +62,7 @@ bool BrotliReadBuffer::nextImpl()

    if (brotli->result == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT && (!in_available || in.eof()))
    {
-        throw Exception(std::string("brotli decode error"), ErrorCodes::CANNOT_READ_ALL_DATA);
+        throw Exception("brotli decode error", ErrorCodes::BROTLI_READ_FAILED);
    }

    out_capacity = internal_buffer.size();
@ -76,13 +82,13 @@ bool BrotliReadBuffer::nextImpl()
        }
        else
        {
-            throw Exception(std::string("brotli decode error"), ErrorCodes::CANNOT_READ_ALL_DATA);
+            throw Exception("brotli decode error", ErrorCodes::BROTLI_READ_FAILED);
        }
    }

    if (brotli->result == BROTLI_DECODER_RESULT_ERROR)
    {
-        throw Exception(std::string("brotli decode error"), ErrorCodes::CANNOT_READ_ALL_DATA);
+        throw Exception("brotli decode error", ErrorCodes::BROTLI_READ_FAILED);
    }

    return true;
--- a/dbms/src/IO/BrotliReadBuffer.h
+++ b/dbms/src/IO/BrotliReadBuffer.h
@ -34,5 +34,6 @@ private:

    bool eof;
 };
+
 }

--- a/dbms/src/IO/BrotliWriteBuffer.cpp
+++ b/dbms/src/IO/BrotliWriteBuffer.cpp
@ -0,0 +1,126 @@
+#include <Common/config.h>
+#if USE_BROTLI
+
+#include <IO/BrotliWriteBuffer.h>
+#include <brotli/encode.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int BROTLI_WRITE_FAILED;
+}
+
+
+class BrotliWriteBuffer::BrotliStateWrapper
+{
+public:
+    BrotliStateWrapper()
+    : state(BrotliEncoderCreateInstance(nullptr, nullptr, nullptr))
+    {
+    }
+
+    ~BrotliStateWrapper()
+    {
+        BrotliEncoderDestroyInstance(state);
+    }
+
+public:
+    BrotliEncoderState * state;
+};
+
+BrotliWriteBuffer::BrotliWriteBuffer(WriteBuffer & out_, int compression_level, size_t buf_size, char * existing_memory, size_t alignment)
+        : BufferWithOwnMemory<WriteBuffer>(buf_size, existing_memory, alignment)
+        , brotli(std::make_unique<BrotliStateWrapper>())
+        , in_available(0)
+        , in_data(nullptr)
+        , out_capacity(0)
+        , out_data(nullptr)
+        , out(out_)
+{
+    BrotliEncoderSetParameter(brotli->state, BROTLI_PARAM_QUALITY, static_cast<uint32_t>(compression_level));
+    // Set LZ77 window size. According to brotli sources default value is 24 (c/tools/brotli.c:81)
+    BrotliEncoderSetParameter(brotli->state, BROTLI_PARAM_LGWIN, 24);
+}
+
+BrotliWriteBuffer::~BrotliWriteBuffer()
+{
+    finish();
+}
+
+void BrotliWriteBuffer::nextImpl()
+{
+    if (!offset())
+    {
+        return;
+    }
+
+    in_data = reinterpret_cast<unsigned char *>(working_buffer.begin());
+    in_available = offset();
+
+    do
+    {
+        out.nextIfAtEnd();
+        out_data = reinterpret_cast<unsigned char *>(out.position());
+        out_capacity = out.buffer().end() - out.position();
+
+        int result = BrotliEncoderCompressStream(
+                brotli->state,
+                in_available ? BROTLI_OPERATION_PROCESS : BROTLI_OPERATION_FINISH,
+                &in_available,
+                &in_data,
+                &out_capacity,
+                &out_data,
+                nullptr);
+
+        out.position() = out.buffer().end() - out_capacity;
+
+        if (result == 0)
+        {
+            throw Exception("brotli compress failed", ErrorCodes::BROTLI_WRITE_FAILED);
+        }
+    }
+    while (in_available > 0 || out_capacity == 0);
+}
+
+void BrotliWriteBuffer::finish()
+{
+    if (finished)
+        return;
+
+    next();
+
+    while (true)
+    {
+        out.nextIfAtEnd();
+        out_data = reinterpret_cast<unsigned char *>(out.position());
+        out_capacity = out.buffer().end() - out.position();
+
+        int result = BrotliEncoderCompressStream(
+                brotli->state,
+                BROTLI_OPERATION_FINISH,
+                &in_available,
+                &in_data,
+                &out_capacity,
+                &out_data,
+                nullptr);
+
+        out.position() = out.buffer().end() - out_capacity;
+
+        if (BrotliEncoderIsFinished(brotli->state))
+        {
+            finished = true;
+            return;
+        }
+
+        if (result == 0)
+        {
+            throw Exception("brotli compress failed", ErrorCodes::BROTLI_WRITE_FAILED);
+        }
+    }
+}
+
+}
+
+#endif
--- a/dbms/src/IO/BrotliWriteBuffer.h
+++ b/dbms/src/IO/BrotliWriteBuffer.h
@ -0,0 +1,40 @@
+#pragma once
+
+#include <IO/WriteBuffer.h>
+#include <IO/BufferWithOwnMemory.h>
+
+namespace DB
+{
+
+class BrotliWriteBuffer : public BufferWithOwnMemory<WriteBuffer>
+{
+public:
+    BrotliWriteBuffer(
+            WriteBuffer & out_,
+            int compression_level,
+            size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
+            char * existing_memory = nullptr,
+            size_t alignment = 0);
+
+    ~BrotliWriteBuffer() override;
+
+    void finish();
+
+private:
+    void nextImpl() override;
+
+    class BrotliStateWrapper;
+    std::unique_ptr<BrotliStateWrapper> brotli;
+
+    size_t in_available;
+    const uint8_t * in_data;
+
+    size_t out_capacity;
+    uint8_t  * out_data;
+
+    WriteBuffer & out;
+
+    bool finished = false;
+};
+
+}
--- a/dbms/src/IO/BufferWithOwnMemory.h
+++ b/dbms/src/IO/BufferWithOwnMemory.h
@ -24,7 +24,8 @@ namespace DB
  * Differs in that is doesn't do unneeded memset. (And also tries to do as little as possible.)
  * Also allows to allocate aligned piece of memory (to use with O_DIRECT, for example).
  */
-struct Memory : boost::noncopyable, Allocator<false>
+template <typename Allocator = Allocator<false>>
+struct Memory : boost::noncopyable, Allocator
 {
    /// Padding is needed to allow usage of 'memcpySmallAllowReadWriteOverflow15' function with this buffer.
    static constexpr size_t pad_right = 15;
@ -136,7 +137,7 @@ template <typename Base>
 class BufferWithOwnMemory : public Base
 {
 protected:
-    Memory memory;
+    Memory<> memory;
 public:
    /// If non-nullptr 'existing_memory' is passed, then buffer will not create its own memory and will use existing_memory without ownership.
    BufferWithOwnMemory(size_t size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, size_t alignment = 0)
--- a/dbms/src/IO/ZlibCompressionMethod.h
+++ b/dbms/src/IO/ZlibCompressionMethod.h
@ -3,7 +3,7 @@
 namespace DB
 {

-enum class ZlibCompressionMethod
+enum class CompressionMethod
 {
    /// DEFLATE compression with gzip header and CRC32 checksum.
    /// This option corresponds to files produced by gzip(1) or HTTP Content-Encoding: gzip.
@ -11,6 +11,7 @@ enum class ZlibCompressionMethod
    /// DEFLATE compression with zlib header and Adler32 checksum.
    /// This option corresponds to HTTP Content-Encoding: deflate.
    Zlib,
+    Brotli,
 };

 }
--- a/dbms/src/IO/ReadBuffer.h
+++ b/dbms/src/IO/ReadBuffer.h
@ -179,7 +179,7 @@ private:
      */
    virtual bool nextImpl() { return false; }

-    void throwReadAfterEOF()
+    [[noreturn]] void throwReadAfterEOF()
    {
        throw Exception("Attempt to read after eof", ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF);
    }
--- a/dbms/src/IO/ReadBufferAIO.cpp
+++ b/dbms/src/IO/ReadBufferAIO.cpp
@ -187,6 +187,9 @@ off_t ReadBufferAIO::doSeek(off_t off, int whence)
            pos = working_buffer.end();
            first_unread_pos_in_file = new_pos_in_file;

+            /// If we go back, than it's not eof
+            is_eof = false;
+
            /// We can not use the result of the current asynchronous request.
            skip();
        }
--- a/dbms/src/IO/ReadBufferFromFileBase.h
+++ b/dbms/src/IO/ReadBufferFromFileBase.h
@ -43,6 +43,7 @@ protected:
    ProfileCallback profile_callback;
    clockid_t clock_type;

+    /// Children implementation should be able to seek backwards
    virtual off_t doSeek(off_t off, int whence) = 0;
 };

--- a/dbms/src/IO/ReadHelpers.h
+++ b/dbms/src/IO/ReadHelpers.h
@ -164,7 +164,7 @@ void readVectorBinary(std::vector<T> & v, ReadBuffer & buf, size_t MAX_VECTOR_SI
 void assertString(const char * s, ReadBuffer & buf);
 void assertEOF(ReadBuffer & buf);

-void throwAtAssertionFailed(const char * s, ReadBuffer & buf);
+[[noreturn]] void throwAtAssertionFailed(const char * s, ReadBuffer & buf);

 inline void assertChar(char symbol, ReadBuffer & buf)
 {
--- a/dbms/src/IO/UncompressedCache.h
+++ b/dbms/src/IO/UncompressedCache.h
@ -6,6 +6,11 @@
 #include <Common/ProfileEvents.h>
 #include <IO/BufferWithOwnMemory.h>

+#include <Common/config.h>
+#if USE_LFALLOC
+#include <Common/LFAllocator.h>
+#endif
+

 namespace ProfileEvents
 {
@ -20,8 +25,13 @@ namespace DB

 struct UncompressedCacheCell
 {
-    Memory data;
+#if USE_LFALLOC
+    Memory<LFAllocator> data;
+#else
+    Memory<> data;
+#endif
    size_t compressed_size;
+    UInt32 additional_bytes;
 };

 struct UncompressedSizeWeightFunction
--- a/dbms/src/IO/VarInt.h
+++ b/dbms/src/IO/VarInt.h
@ -113,7 +113,7 @@ readVarUInt(T & x, ReadBuffer & istr)
 }


-inline void throwReadAfterEOF()
+[[noreturn]] inline void throwReadAfterEOF()
 {
    throw Exception("Attempt to read after eof", ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF);
 }
--- a/dbms/src/IO/WriteBufferFromHTTPServerResponse.cpp
+++ b/dbms/src/IO/WriteBufferFromHTTPServerResponse.cpp
@ -76,34 +76,47 @@ void WriteBufferFromHTTPServerResponse::nextImpl()
        {
            if (compress)
            {
-                if (compression_method == ZlibCompressionMethod::Gzip)
+                if (compression_method == CompressionMethod::Gzip)
                {
 #if defined(POCO_CLICKHOUSE_PATCH)
                    *response_header_ostr << "Content-Encoding: gzip\r\n";
 #else
                    response.set("Content-Encoding", "gzip");
+                    response_body_ostr = &(response.send());
 #endif
+                    out_raw.emplace(*response_body_ostr);
+                    deflating_buf.emplace(*out_raw, compression_method, compression_level, working_buffer.size(), working_buffer.begin());
+                    out = &*deflating_buf;
                }
-                else if (compression_method == ZlibCompressionMethod::Zlib)
+                else if (compression_method == CompressionMethod::Zlib)
                {
 #if defined(POCO_CLICKHOUSE_PATCH)
                    *response_header_ostr << "Content-Encoding: deflate\r\n";
 #else
                    response.set("Content-Encoding", "deflate");
+                    response_body_ostr = &(response.send());
 #endif
+                    out_raw.emplace(*response_body_ostr);
+                    deflating_buf.emplace(*out_raw, compression_method, compression_level, working_buffer.size(), working_buffer.begin());
+                    out = &*deflating_buf;
                }
+                else if (compression_method == CompressionMethod::Brotli)
+                {
+#if defined(POCO_CLICKHOUSE_PATCH)
+                    *response_header_ostr << "Content-Encoding: br\r\n";
+#else
+                    response.set("Content-Encoding", "br");
+                    response_body_ostr = &(response.send());
+#endif
+                    out_raw.emplace(*response_body_ostr);
+                    brotli_buf.emplace(*out_raw, compression_level, working_buffer.size(), working_buffer.begin());
+                    out = &*brotli_buf;
+                }
+
                else
                    throw Exception("Logical error: unknown compression method passed to WriteBufferFromHTTPServerResponse",
                                    ErrorCodes::LOGICAL_ERROR);
                /// Use memory allocated for the outer buffer in the buffer pointed to by out. This avoids extra allocation and copy.
-
-#if !defined(POCO_CLICKHOUSE_PATCH)
-                response_body_ostr = &(response.send());
-#endif
-
-                out_raw.emplace(*response_body_ostr);
-                deflating_buf.emplace(*out_raw, compression_method, compression_level, working_buffer.size(), working_buffer.begin());
-                out = &*deflating_buf;
            }
            else
            {
@ -133,7 +146,7 @@ WriteBufferFromHTTPServerResponse::WriteBufferFromHTTPServerResponse(
    Poco::Net::HTTPServerResponse & response_,
    unsigned keep_alive_timeout_,
    bool compress_,
-    ZlibCompressionMethod compression_method_,
+    CompressionMethod compression_method_,
    size_t size)
    : BufferWithOwnMemory<WriteBuffer>(size)
    , request(request_)
--- a/dbms/src/IO/WriteBufferFromHTTPServerResponse.h
+++ b/dbms/src/IO/WriteBufferFromHTTPServerResponse.h
@ -9,6 +9,7 @@
 #include <IO/BufferWithOwnMemory.h>
 #include <IO/WriteBufferFromOStream.h>
 #include <IO/ZlibDeflatingWriteBuffer.h>
+#include <IO/BrotliWriteBuffer.h>
 #include <IO/HTTPCommon.h>
 #include <IO/Progress.h>
 #include <Common/NetException.h>
@ -49,7 +50,7 @@ private:
    bool add_cors_header = false;
    unsigned keep_alive_timeout = 0;
    bool compress = false;
-    ZlibCompressionMethod compression_method;
+    CompressionMethod compression_method;
    int compression_level = Z_DEFAULT_COMPRESSION;

    std::ostream * response_body_ostr = nullptr;
@ -60,6 +61,7 @@ private:

    std::optional<WriteBufferFromOStream> out_raw;
    std::optional<ZlibDeflatingWriteBuffer> deflating_buf;
+    std::optional<BrotliWriteBuffer> brotli_buf;

    WriteBuffer * out = nullptr;     /// Uncompressed HTTP body is written to this buffer. Points to out_raw or possibly to deflating_buf.

@ -89,7 +91,7 @@ public:
        Poco::Net::HTTPServerResponse & response_,
        unsigned keep_alive_timeout_,
        bool compress_ = false,        /// If true - set Content-Encoding header and compress the result.
-        ZlibCompressionMethod compression_method_ = ZlibCompressionMethod::Gzip,
+        CompressionMethod compression_method_ = CompressionMethod::Gzip,
        size_t size = DBMS_DEFAULT_BUFFER_SIZE);

    /// Writes progess in repeating HTTP headers.
--- a/dbms/src/IO/ZlibDeflatingWriteBuffer.cpp
+++ b/dbms/src/IO/ZlibDeflatingWriteBuffer.cpp
@ -6,7 +6,7 @@ namespace DB

 ZlibDeflatingWriteBuffer::ZlibDeflatingWriteBuffer(
        WriteBuffer & out_,
-        ZlibCompressionMethod compression_method,
+        CompressionMethod compression_method,
        int compression_level,
        size_t buf_size,
        char * existing_memory,
@ -23,7 +23,7 @@ ZlibDeflatingWriteBuffer::ZlibDeflatingWriteBuffer(
    zstr.avail_out = 0;

    int window_bits = 15;
-    if (compression_method == ZlibCompressionMethod::Gzip)
+    if (compression_method == CompressionMethod::Gzip)
    {
        window_bits += 16;
    }
--- a/dbms/src/IO/ZlibDeflatingWriteBuffer.h
+++ b/dbms/src/IO/ZlibDeflatingWriteBuffer.h
@ -2,7 +2,7 @@

 #include <IO/WriteBuffer.h>
 #include <IO/BufferWithOwnMemory.h>
-#include <IO/ZlibCompressionMethod.h>
+#include <IO/CompressionMethod.h>

 #include <zlib.h>

@ -21,7 +21,7 @@ class ZlibDeflatingWriteBuffer : public BufferWithOwnMemory<WriteBuffer>
 public:
    ZlibDeflatingWriteBuffer(
            WriteBuffer & out_,
-            ZlibCompressionMethod compression_method,
+            CompressionMethod compression_method,
            int compression_level,
            size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
            char * existing_memory = nullptr,
--- a/dbms/src/IO/ZlibInflatingReadBuffer.cpp
+++ b/dbms/src/IO/ZlibInflatingReadBuffer.cpp
@ -6,7 +6,7 @@ namespace DB

 ZlibInflatingReadBuffer::ZlibInflatingReadBuffer(
        ReadBuffer & in_,
-        ZlibCompressionMethod compression_method,
+        CompressionMethod compression_method,
        size_t buf_size,
        char * existing_memory,
        size_t alignment)
@ -23,7 +23,7 @@ ZlibInflatingReadBuffer::ZlibInflatingReadBuffer(
    zstr.avail_out = 0;

    int window_bits = 15;
-    if (compression_method == ZlibCompressionMethod::Gzip)
+    if (compression_method == CompressionMethod::Gzip)
    {
        window_bits += 16;
    }
--- a/dbms/src/IO/ZlibInflatingReadBuffer.h
+++ b/dbms/src/IO/ZlibInflatingReadBuffer.h
@ -2,7 +2,7 @@

 #include <IO/ReadBuffer.h>
 #include <IO/BufferWithOwnMemory.h>
-#include <IO/ZlibCompressionMethod.h>
+#include <IO/CompressionMethod.h>

 #include <zlib.h>

@ -22,7 +22,7 @@ class ZlibInflatingReadBuffer : public BufferWithOwnMemory<ReadBuffer>
 public:
    ZlibInflatingReadBuffer(
            ReadBuffer & in_,
-            ZlibCompressionMethod compression_method,
+            CompressionMethod compression_method,
            size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
            char * existing_memory = nullptr,
            size_t alignment = 0);
--- a/dbms/src/IO/tests/gtest_aio_seek_back_after_eof.cpp
+++ b/dbms/src/IO/tests/gtest_aio_seek_back_after_eof.cpp
@ -0,0 +1,71 @@
+#pragma GCC diagnostic ignored "-Wsign-compare"
+#ifdef __clang__
+#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
+#pragma clang diagnostic ignored "-Wundef"
+#endif
+#include <gtest/gtest.h>
+
+#include <Core/Defines.h>
+#include <port/unistd.h>
+#include <IO/ReadBufferAIO.h>
+#include <fstream>
+
+namespace
+{
+std::string createTmpFileForEOFtest()
+{
+    char pattern[] = "/tmp/fileXXXXXX";
+    char * dir = ::mkdtemp(pattern);
+    return std::string(dir) + "/foo";
+}
+
+void prepare_for_eof(std::string & filename, std::string & buf)
+{
+    static const std::string symbols = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
+
+    filename = createTmpFileForEOFtest();
+
+    size_t n = 10 * DEFAULT_AIO_FILE_BLOCK_SIZE;
+    buf.reserve(n);
+
+    for (size_t i = 0; i < n; ++i)
+        buf += symbols[i % symbols.length()];
+
+    std::ofstream out(filename.c_str());
+    out << buf;
+}
+
+
+}
+TEST(ReadBufferAIOTest, TestReadAfterAIO)
+{
+    using namespace DB;
+    std::string data;
+    std::string file_path;
+    prepare_for_eof(file_path, data);
+    ReadBufferAIO testbuf(file_path);
+
+    std::string newdata;
+    newdata.resize(data.length());
+
+    size_t total_read = testbuf.read(newdata.data(), newdata.length());
+    EXPECT_EQ(total_read, data.length());
+    EXPECT_TRUE(testbuf.eof());
+
+
+    testbuf.seek(data.length() - 100);
+
+    std::string smalldata;
+    smalldata.resize(100);
+    size_t read_after_eof = testbuf.read(smalldata.data(), smalldata.size());
+    EXPECT_EQ(read_after_eof, 100);
+    EXPECT_TRUE(testbuf.eof());
+
+
+    testbuf.seek(0);
+    std::string repeatdata;
+    repeatdata.resize(data.length());
+    size_t read_after_eof_big = testbuf.read(repeatdata.data(), repeatdata.size());
+    EXPECT_EQ(read_after_eof_big, data.length());
+    EXPECT_TRUE(testbuf.eof());
+}
--- a/dbms/src/IO/tests/zlib_buffers.cpp
+++ b/dbms/src/IO/tests/zlib_buffers.cpp
@ -23,7 +23,7 @@ try

    {
        DB::WriteBufferFromFile buf("test_zlib_buffers.gz", DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_CREAT | O_TRUNC);
-        DB::ZlibDeflatingWriteBuffer deflating_buf(buf, DB::ZlibCompressionMethod::Gzip, /* compression_level = */ 3);
+        DB::ZlibDeflatingWriteBuffer deflating_buf(buf, DB::CompressionMethod::Gzip, /* compression_level = */ 3);

        stopwatch.restart();
        for (size_t i = 0; i < n; ++i)
@ -41,7 +41,7 @@ try

    {
        DB::ReadBufferFromFile buf("test_zlib_buffers.gz");
-        DB::ZlibInflatingReadBuffer inflating_buf(buf, DB::ZlibCompressionMethod::Gzip);
+        DB::ZlibInflatingReadBuffer inflating_buf(buf, DB::CompressionMethod::Gzip);

        stopwatch.restart();
        for (size_t i = 0; i < n; ++i)
--- a/dbms/src/Interpreters/AddDefaultDatabaseVisitor.h
+++ b/dbms/src/Interpreters/AddDefaultDatabaseVisitor.h
@ -71,8 +71,8 @@ private:

    void visit(ASTSelectQuery & select, ASTPtr &) const
    {
-        if (select.tables)
-            tryVisit<ASTTablesInSelectQuery>(select.tables);
+        if (select.tables())
+            tryVisit<ASTTablesInSelectQuery>(select.refTables());

        visitChildren(select);
    }
--- a/Show More
+++ b/Show More