Merge branch 'master' into fix-fpe-datetime64

2024-11-21 23:21:59 +00:00 · 2020-06-23 22:45:45 +03:00 · 2020-06-23 22:45:45 +03:00 · cda2687195
commit cda2687195
parent c51c2656eb 55eee9bfa1
336 changed files with 5499 additions and 2534 deletions
--- a/base/daemon/BaseDaemon.cpp
+++ b/base/daemon/BaseDaemon.cpp
@ -85,7 +85,8 @@ static const size_t signal_pipe_buf_size =
    + sizeof(ucontext_t)
    + sizeof(StackTrace)
    + sizeof(UInt32)
-    + max_query_id_size + 1;    /// query_id + varint encoded length
+    + max_query_id_size + 1    /// query_id + varint encoded length
+    + sizeof(void*);


 using signal_function = void(int, siginfo_t*, void*);
@ -135,6 +136,7 @@ static void signalHandler(int sig, siginfo_t * info, void * context)
    DB::writePODBinary(stack_trace, out);
    DB::writeBinary(UInt32(getThreadId()), out);
    DB::writeStringBinary(query_id, out);
+    DB::writePODBinary(DB::current_thread, out);

    out.next();

@ -218,16 +220,18 @@ public:
                StackTrace stack_trace(NoCapture{});
                UInt32 thread_num;
                std::string query_id;
+                DB::ThreadStatus * thread_ptr{};

                DB::readPODBinary(info, in);
                DB::readPODBinary(context, in);
                DB::readPODBinary(stack_trace, in);
                DB::readBinary(thread_num, in);
                DB::readBinary(query_id, in);
+                DB::readPODBinary(thread_ptr, in);

                /// This allows to receive more signals if failure happens inside onFault function.
                /// Example: segfault while symbolizing stack trace.
-                std::thread([=, this] { onFault(sig, info, context, stack_trace, thread_num, query_id); }).detach();
+                std::thread([=, this] { onFault(sig, info, context, stack_trace, thread_num, query_id, thread_ptr); }).detach();
            }
        }
    }
@ -248,8 +252,19 @@ private:
        const ucontext_t & context,
        const StackTrace & stack_trace,
        UInt32 thread_num,
-        const std::string & query_id) const
+        const std::string & query_id,
+        DB::ThreadStatus * thread_ptr) const
    {
+        DB::ThreadStatus thread_status;
+
+        /// Send logs from this thread to client if possible.
+        /// It will allow client to see failure messages directly.
+        if (thread_ptr)
+        {
+            if (auto logs_queue = thread_ptr->getInternalTextLogsQueue())
+                DB::CurrentThread::attachInternalTextLogsQueue(logs_queue, DB::LogsLevel::trace);
+        }
+
        LOG_FATAL(log, "########################################");

        if (query_id.empty())
@ -280,6 +295,10 @@ private:

        /// Write symbolized stack trace line by line for better grep-ability.
        stack_trace.toStringEveryLine([&](const std::string & s) { LOG_FATAL(log, s); });
+
+        /// When everything is done, we will try to send these error messages to client.
+        if (thread_ptr)
+            thread_ptr->onFatalError();
    }
 };

--- a/contrib/jemalloc-cmake/CMakeLists.txt
+++ b/contrib/jemalloc-cmake/CMakeLists.txt
@ -1,23 +1,31 @@
 option (ENABLE_JEMALLOC "Enable jemalloc allocator" ${ENABLE_LIBRARIES})

-if (SANITIZE OR NOT OS_LINUX OR NOT (ARCH_AMD64 OR ARCH_ARM))
+if (SANITIZE OR NOT (ARCH_AMD64 OR ARCH_ARM) OR NOT (OS_LINUX OR OS_FREEBSD))
    set (ENABLE_JEMALLOC OFF)
-    message (STATUS "jemalloc is disabled implicitly: it doesn't work with sanitizers and can only be used on Linux with x86_64 or aarch64.")
+    message (STATUS "jemalloc is disabled implicitly: it doesn't work with sanitizers and can only be used with x86_64 or aarch64 on linux or freebsd.")
 endif ()

 if (ENABLE_JEMALLOC)
+    if (NOT OS_LINUX)
+        message (WARNING "jemalloc support on non-linux is EXPERIMENTAL")
+    endif()
+
    option (USE_INTERNAL_JEMALLOC "Use internal jemalloc library" ${NOT_UNBUNDLED})

    if (USE_INTERNAL_JEMALLOC)
-        # ThreadPool select job randomly, and there can be some threads that had been
-        # performed some memory heavy task before and will be inactive for some time,
-        # but until it will became active again, the memory will not be freed since by
-        # default each thread has it's own arena, but there should be not more then
-        # 4*CPU arenas (see opt.nareans description).
-        #
-        # By enabling percpu_arena number of arenas limited to number of CPUs and hence
-        # this problem should go away.
-        set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0")
+        if (OS_LINUX)
+            # ThreadPool select job randomly, and there can be some threads that had been
+            # performed some memory heavy task before and will be inactive for some time,
+            # but until it will became active again, the memory will not be freed since by
+            # default each thread has it's own arena, but there should be not more then
+            # 4*CPU arenas (see opt.nareans description).
+            #
+            # By enabling percpu_arena number of arenas limited to number of CPUs and hence
+            # this problem should go away.
+            set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0")
+        else()
+            set (JEMALLOC_CONFIG_MALLOC_CONF "oversize_threshold:0")
+        endif()
        # CACHE variable is empty, to allow changing defaults without necessity
        # to purge cache
        set (JEMALLOC_CONFIG_MALLOC_CONF_OVERRIDE "" CACHE STRING "Change default configuration string of JEMalloc" )
@ -71,14 +79,26 @@ if (ENABLE_JEMALLOC)
        target_include_directories(jemalloc PRIVATE ${LIBRARY_DIR}/include)
        target_include_directories(jemalloc SYSTEM PUBLIC include)

-        set(JEMALLOC_INCLUDE)
-        if (ARCH_AMD64)
-            set(JEMALLOC_INCLUDE_PREFIX include_linux_x86_64)
-        elseif (ARCH_ARM)
-            set(JEMALLOC_INCLUDE_PREFIX include_linux_aarch64)
+        set (JEMALLOC_INCLUDE_PREFIX)
+        # OS_
+        if (OS_LINUX)
+            set (JEMALLOC_INCLUDE_PREFIX "include_linux")
+        elseif (OS_FREEBSD)
+            set (JEMALLOC_INCLUDE_PREFIX "include_freebsd")
+        elseif (OS_DARWIN)
+            set (JEMALLOC_INCLUDE_PREFIX "include_darwin")
+        else ()
+            message (FATAL_ERROR "This OS is not supported")
        endif ()
-        target_include_directories(jemalloc SYSTEM PUBLIC
-            ${JEMALLOC_INCLUDE_PREFIX})
+        # ARCH_
+        if (ARCH_AMD64)
+            set(JEMALLOC_INCLUDE_PREFIX "${JEMALLOC_INCLUDE_PREFIX}_x86_64")
+        elseif (ARCH_ARM)
+            set(JEMALLOC_INCLUDE_PREFIX "${JEMALLOC_INCLUDE_PREFIX}_aarch64")
+        else ()
+            message (FATAL_ERROR "This arch is not supported")
+        endif ()
+
        configure_file(${JEMALLOC_INCLUDE_PREFIX}/jemalloc/internal/jemalloc_internal_defs.h.in
            ${JEMALLOC_INCLUDE_PREFIX}/jemalloc/internal/jemalloc_internal_defs.h)
        target_include_directories(jemalloc SYSTEM PRIVATE
--- a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_preamble.h
+++ b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_preamble.h
--- a/contrib/jemalloc-cmake/include/jemalloc/jemalloc_protos.h
+++ b/contrib/jemalloc-cmake/include/jemalloc/jemalloc_protos.h
@ -1,3 +1,13 @@
+// OSX does not have this for system alloc functions, so you will get
+// "exception specification in declaration" error.
+#if defined(__APPLE__) || defined(__FreeBSD__)
+# undef JEMALLOC_NOTHROW
+# define JEMALLOC_NOTHROW
+
+# undef JEMALLOC_CXX_THROW
+# define JEMALLOC_CXX_THROW
+#endif
+
 /*
 * The je_ prefix on the following public symbol declarations is an artifact
 * of namespace management, and should be omitted in application code unless
--- a/contrib/jemalloc-cmake/include_darwin_aarch64/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/contrib/jemalloc-cmake/include_darwin_aarch64/jemalloc/internal/jemalloc_internal_defs.h.in
@ -0,0 +1,372 @@
+/* include/jemalloc/internal/jemalloc_internal_defs.h.  Generated from jemalloc_internal_defs.h.in by configure.  */
+#ifndef JEMALLOC_INTERNAL_DEFS_H_
+#define JEMALLOC_INTERNAL_DEFS_H_
+/*
+ * If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all
+ * public APIs to be prefixed.  This makes it possible, with some care, to use
+ * multiple allocators simultaneously.
+ */
+#define JEMALLOC_PREFIX "je_"
+#define JEMALLOC_CPREFIX "JE_"
+
+/*
+ * Define overrides for non-standard allocator-related functions if they are
+ * present on the system.
+ */
+/* #undef JEMALLOC_OVERRIDE___LIBC_CALLOC */
+/* #undef JEMALLOC_OVERRIDE___LIBC_FREE */
+/* #undef JEMALLOC_OVERRIDE___LIBC_MALLOC */
+/* #undef JEMALLOC_OVERRIDE___LIBC_MEMALIGN */
+/* #undef JEMALLOC_OVERRIDE___LIBC_REALLOC */
+/* #undef JEMALLOC_OVERRIDE___LIBC_VALLOC */
+/* #undef JEMALLOC_OVERRIDE___POSIX_MEMALIGN */
+
+/*
+ * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs.
+ * For shared libraries, symbol visibility mechanisms prevent these symbols
+ * from being exported, but for static libraries, naming collisions are a real
+ * possibility.
+ */
+#define JEMALLOC_PRIVATE_NAMESPACE je_
+
+/*
+ * Hyper-threaded CPUs may need a special instruction inside spin loops in
+ * order to yield to another virtual CPU.
+ */
+#define CPU_SPINWAIT
+/* 1 if CPU_SPINWAIT is defined, 0 otherwise. */
+#define HAVE_CPU_SPINWAIT 0
+
+/*
+ * Number of significant bits in virtual addresses.  This may be less than the
+ * total number of bits in a pointer, e.g. on x64, for which the uppermost 16
+ * bits are the same as bit 47.
+ */
+#define LG_VADDR 48
+
+/* Defined if C11 atomics are available. */
+#define JEMALLOC_C11_ATOMICS 1
+
+/* Defined if GCC __atomic atomics are available. */
+#define JEMALLOC_GCC_ATOMIC_ATOMICS 1
+/* and the 8-bit variant support. */
+#define JEMALLOC_GCC_U8_ATOMIC_ATOMICS 1
+
+/* Defined if GCC __sync atomics are available. */
+#define JEMALLOC_GCC_SYNC_ATOMICS 1
+/* and the 8-bit variant support. */
+#define JEMALLOC_GCC_U8_SYNC_ATOMICS 1
+
+/*
+ * Defined if __builtin_clz() and __builtin_clzl() are available.
+ */
+#define JEMALLOC_HAVE_BUILTIN_CLZ
+
+/*
+ * Defined if os_unfair_lock_*() functions are available, as provided by Darwin.
+ */
+#define JEMALLOC_OS_UNFAIR_LOCK
+
+/* Defined if syscall(2) is usable. */
+/* #undef JEMALLOC_USE_SYSCALL */
+
+/*
+ * Defined if secure_getenv(3) is available.
+ */
+/* #undef JEMALLOC_HAVE_SECURE_GETENV */
+
+/*
+ * Defined if issetugid(2) is available.
+ */
+#define JEMALLOC_HAVE_ISSETUGID
+
+/* Defined if pthread_atfork(3) is available. */
+#define JEMALLOC_HAVE_PTHREAD_ATFORK
+
+/* Defined if pthread_setname_np(3) is available. */
+/* #undef JEMALLOC_HAVE_PTHREAD_SETNAME_NP */
+
+/*
+ * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available.
+ */
+/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE */
+
+/*
+ * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available.
+ */
+/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC */
+
+/*
+ * Defined if mach_absolute_time() is available.
+ */
+#define JEMALLOC_HAVE_MACH_ABSOLUTE_TIME 1
+
+/*
+ * Defined if clock_gettime(CLOCK_REALTIME, ...) is available.
+ */
+#define JEMALLOC_HAVE_CLOCK_REALTIME 1
+
+/*
+ * Defined if _malloc_thread_cleanup() exists.  At least in the case of
+ * FreeBSD, pthread_key_create() allocates, which if used during malloc
+ * bootstrapping will cause recursion into the pthreads library.  Therefore, if
+ * _malloc_thread_cleanup() exists, use it as the basis for thread cleanup in
+ * malloc_tsd.
+ */
+/* #undef JEMALLOC_MALLOC_THREAD_CLEANUP */
+
+/*
+ * Defined if threaded initialization is known to be safe on this platform.
+ * Among other things, it must be possible to initialize a mutex without
+ * triggering allocation in order for threaded allocation to be safe.
+ */
+/* #undef JEMALLOC_THREADED_INIT */
+
+/*
+ * Defined if the pthreads implementation defines
+ * _pthread_mutex_init_calloc_cb(), in which case the function is used in order
+ * to avoid recursive allocation during mutex initialization.
+ */
+/* #undef JEMALLOC_MUTEX_INIT_CB */
+
+/* Non-empty if the tls_model attribute is supported. */
+#define JEMALLOC_TLS_MODEL __attribute__((tls_model("initial-exec")))
+
+/*
+ * JEMALLOC_DEBUG enables assertions and other sanity checks, and disables
+ * inline functions.
+ */
+/* #undef JEMALLOC_DEBUG */
+
+/* JEMALLOC_STATS enables statistics calculation. */
+#define JEMALLOC_STATS
+
+/* JEMALLOC_EXPERIMENTAL_SMALLOCX_API enables experimental smallocx API. */
+/* #undef JEMALLOC_EXPERIMENTAL_SMALLOCX_API */
+
+/* JEMALLOC_PROF enables allocation profiling. */
+/* #undef JEMALLOC_PROF */
+
+/* Use libunwind for profile backtracing if defined. */
+/* #undef JEMALLOC_PROF_LIBUNWIND */
+
+/* Use libgcc for profile backtracing if defined. */
+/* #undef JEMALLOC_PROF_LIBGCC */
+
+/* Use gcc intrinsics for profile backtracing if defined. */
+/* #undef JEMALLOC_PROF_GCC */
+
+/*
+ * JEMALLOC_DSS enables use of sbrk(2) to allocate extents from the data storage
+ * segment (DSS).
+ */
+/* #undef JEMALLOC_DSS */
+
+/* Support memory filling (junk/zero). */
+#define JEMALLOC_FILL
+
+/* Support utrace(2)-based tracing. */
+/* #undef JEMALLOC_UTRACE */
+
+/* Support optional abort() on OOM. */
+/* #undef JEMALLOC_XMALLOC */
+
+/* Support lazy locking (avoid locking unless a second thread is launched). */
+/* #undef JEMALLOC_LAZY_LOCK */
+
+/*
+ * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size
+ * classes).
+ */
+/* #undef LG_QUANTUM */
+
+/* One page is 2^LG_PAGE bytes. */
+#define LG_PAGE 16
+
+/*
+ * One huge page is 2^LG_HUGEPAGE bytes.  Note that this is defined even if the
+ * system does not explicitly support huge pages; system calls that require
+ * explicit huge page support are separately configured.
+ */
+#define LG_HUGEPAGE 29
+
+/*
+ * If defined, adjacent virtual memory mappings with identical attributes
+ * automatically coalesce, and they fragment when changes are made to subranges.
+ * This is the normal order of things for mmap()/munmap(), but on Windows
+ * VirtualAlloc()/VirtualFree() operations must be precisely matched, i.e.
+ * mappings do *not* coalesce/fragment.
+ */
+#define JEMALLOC_MAPS_COALESCE
+
+/*
+ * If defined, retain memory for later reuse by default rather than using e.g.
+ * munmap() to unmap freed extents.  This is enabled on 64-bit Linux because
+ * common sequences of mmap()/munmap() calls will cause virtual memory map
+ * holes.
+ */
+/* #undef JEMALLOC_RETAIN */
+
+/* TLS is used to map arenas and magazine caches to threads. */
+/* #undef JEMALLOC_TLS */
+
+/*
+ * Used to mark unreachable code to quiet "end of non-void" compiler warnings.
+ * Don't use this directly; instead use unreachable() from util.h
+ */
+#define JEMALLOC_INTERNAL_UNREACHABLE __builtin_unreachable
+
+/*
+ * ffs*() functions to use for bitmapping.  Don't use these directly; instead,
+ * use ffs_*() from util.h.
+ */
+#define JEMALLOC_INTERNAL_FFSLL __builtin_ffsll
+#define JEMALLOC_INTERNAL_FFSL __builtin_ffsl
+#define JEMALLOC_INTERNAL_FFS __builtin_ffs
+
+/*
+ * popcount*() functions to use for bitmapping.
+ */
+#define JEMALLOC_INTERNAL_POPCOUNTL __builtin_popcountl
+#define JEMALLOC_INTERNAL_POPCOUNT __builtin_popcount
+
+/*
+ * If defined, explicitly attempt to more uniformly distribute large allocation
+ * pointer alignments across all cache indices.
+ */
+#define JEMALLOC_CACHE_OBLIVIOUS
+
+/*
+ * If defined, enable logging facilities.  We make this a configure option to
+ * avoid taking extra branches everywhere.
+ */
+/* #undef JEMALLOC_LOG */
+
+/*
+ * If defined, use readlinkat() (instead of readlink()) to follow
+ * /etc/malloc_conf.
+ */
+/* #undef JEMALLOC_READLINKAT */
+
+/*
+ * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings.
+ */
+#define JEMALLOC_ZONE
+
+/*
+ * Methods for determining whether the OS overcommits.
+ * JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY: Linux's
+ *                                         /proc/sys/vm.overcommit_memory file.
+ * JEMALLOC_SYSCTL_VM_OVERCOMMIT: FreeBSD's vm.overcommit sysctl.
+ */
+/* #undef JEMALLOC_SYSCTL_VM_OVERCOMMIT */
+/* #undef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY */
+
+/* Defined if madvise(2) is available. */
+#define JEMALLOC_HAVE_MADVISE
+
+/*
+ * Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE
+ * arguments to madvise(2).
+ */
+/* #undef JEMALLOC_HAVE_MADVISE_HUGE */
+
+/*
+ * Methods for purging unused pages differ between operating systems.
+ *
+ *   madvise(..., MADV_FREE) : This marks pages as being unused, such that they
+ *                             will be discarded rather than swapped out.
+ *   madvise(..., MADV_DONTNEED) : If JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS is
+ *                                 defined, this immediately discards pages,
+ *                                 such that new pages will be demand-zeroed if
+ *                                 the address region is later touched;
+ *                                 otherwise this behaves similarly to
+ *                                 MADV_FREE, though typically with higher
+ *                                 system overhead.
+ */
+#define JEMALLOC_PURGE_MADVISE_FREE
+#define JEMALLOC_PURGE_MADVISE_DONTNEED
+/* #undef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS */
+
+/* Defined if madvise(2) is available but MADV_FREE is not (x86 Linux only). */
+/* #undef JEMALLOC_DEFINE_MADVISE_FREE */
+
+/*
+ * Defined if MADV_DO[NT]DUMP is supported as an argument to madvise.
+ */
+/* #undef JEMALLOC_MADVISE_DONTDUMP */
+
+/*
+ * Defined if transparent huge pages (THPs) are supported via the
+ * MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled.
+ */
+/* #undef JEMALLOC_THP */
+
+/* Define if operating system has alloca.h header. */
+/* #undef JEMALLOC_HAS_ALLOCA_H */
+
+/* C99 restrict keyword supported. */
+#define JEMALLOC_HAS_RESTRICT 1
+
+/* For use by hash code. */
+/* #undef JEMALLOC_BIG_ENDIAN */
+
+/* sizeof(int) == 2^LG_SIZEOF_INT. */
+#define LG_SIZEOF_INT 2
+
+/* sizeof(long) == 2^LG_SIZEOF_LONG. */
+#define LG_SIZEOF_LONG 3
+
+/* sizeof(long long) == 2^LG_SIZEOF_LONG_LONG. */
+#define LG_SIZEOF_LONG_LONG 3
+
+/* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */
+#define LG_SIZEOF_INTMAX_T 3
+
+/* glibc malloc hooks (__malloc_hook, __realloc_hook, __free_hook). */
+/* #undef JEMALLOC_GLIBC_MALLOC_HOOK */
+
+/* glibc memalign hook. */
+/* #undef JEMALLOC_GLIBC_MEMALIGN_HOOK */
+
+/* pthread support */
+#define JEMALLOC_HAVE_PTHREAD
+
+/* dlsym() support */
+#define JEMALLOC_HAVE_DLSYM
+
+/* Adaptive mutex support in pthreads. */
+/* #undef JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP */
+
+/* GNU specific sched_getcpu support */
+/* #undef JEMALLOC_HAVE_SCHED_GETCPU */
+
+/* GNU specific sched_setaffinity support */
+/* #undef JEMALLOC_HAVE_SCHED_SETAFFINITY */
+
+/*
+ * If defined, all the features necessary for background threads are present.
+ */
+/* #undef JEMALLOC_BACKGROUND_THREAD */
+
+/*
+ * If defined, jemalloc symbols are not exported (doesn't work when
+ * JEMALLOC_PREFIX is not defined).
+ */
+/* #undef JEMALLOC_EXPORT */
+
+/* config.malloc_conf options string. */
+#define JEMALLOC_CONFIG_MALLOC_CONF "@JEMALLOC_CONFIG_MALLOC_CONF@"
+
+/* If defined, jemalloc takes the malloc/free/etc. symbol names. */
+/* #undef JEMALLOC_IS_MALLOC */
+
+/*
+ * Defined if strerror_r returns char * if _GNU_SOURCE is defined.
+ */
+/* #undef JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE */
+
+/* Performs additional safety checks when defined. */
+/* #undef JEMALLOC_OPT_SAFETY_CHECKS */
+
+#endif /* JEMALLOC_INTERNAL_DEFS_H_ */
--- a/contrib/jemalloc-cmake/include_darwin_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/contrib/jemalloc-cmake/include_darwin_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in
@ -0,0 +1,372 @@
+/* include/jemalloc/internal/jemalloc_internal_defs.h.  Generated from jemalloc_internal_defs.h.in by configure.  */
+#ifndef JEMALLOC_INTERNAL_DEFS_H_
+#define JEMALLOC_INTERNAL_DEFS_H_
+/*
+ * If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all
+ * public APIs to be prefixed.  This makes it possible, with some care, to use
+ * multiple allocators simultaneously.
+ */
+#define JEMALLOC_PREFIX "je_"
+#define JEMALLOC_CPREFIX "JE_"
+
+/*
+ * Define overrides for non-standard allocator-related functions if they are
+ * present on the system.
+ */
+/* #undef JEMALLOC_OVERRIDE___LIBC_CALLOC */
+/* #undef JEMALLOC_OVERRIDE___LIBC_FREE */
+/* #undef JEMALLOC_OVERRIDE___LIBC_MALLOC */
+/* #undef JEMALLOC_OVERRIDE___LIBC_MEMALIGN */
+/* #undef JEMALLOC_OVERRIDE___LIBC_REALLOC */
+/* #undef JEMALLOC_OVERRIDE___LIBC_VALLOC */
+/* #undef JEMALLOC_OVERRIDE___POSIX_MEMALIGN */
+
+/*
+ * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs.
+ * For shared libraries, symbol visibility mechanisms prevent these symbols
+ * from being exported, but for static libraries, naming collisions are a real
+ * possibility.
+ */
+#define JEMALLOC_PRIVATE_NAMESPACE je_
+
+/*
+ * Hyper-threaded CPUs may need a special instruction inside spin loops in
+ * order to yield to another virtual CPU.
+ */
+#define CPU_SPINWAIT __asm__ volatile("pause")
+/* 1 if CPU_SPINWAIT is defined, 0 otherwise. */
+#define HAVE_CPU_SPINWAIT 1
+
+/*
+ * Number of significant bits in virtual addresses.  This may be less than the
+ * total number of bits in a pointer, e.g. on x64, for which the uppermost 16
+ * bits are the same as bit 47.
+ */
+#define LG_VADDR 48
+
+/* Defined if C11 atomics are available. */
+#define JEMALLOC_C11_ATOMICS 1
+
+/* Defined if GCC __atomic atomics are available. */
+#define JEMALLOC_GCC_ATOMIC_ATOMICS 1
+/* and the 8-bit variant support. */
+#define JEMALLOC_GCC_U8_ATOMIC_ATOMICS 1
+
+/* Defined if GCC __sync atomics are available. */
+#define JEMALLOC_GCC_SYNC_ATOMICS 1
+/* and the 8-bit variant support. */
+#define JEMALLOC_GCC_U8_SYNC_ATOMICS 1
+
+/*
+ * Defined if __builtin_clz() and __builtin_clzl() are available.
+ */
+#define JEMALLOC_HAVE_BUILTIN_CLZ
+
+/*
+ * Defined if os_unfair_lock_*() functions are available, as provided by Darwin.
+ */
+#define JEMALLOC_OS_UNFAIR_LOCK
+
+/* Defined if syscall(2) is usable. */
+/* #undef JEMALLOC_USE_SYSCALL */
+
+/*
+ * Defined if secure_getenv(3) is available.
+ */
+/* #undef JEMALLOC_HAVE_SECURE_GETENV */
+
+/*
+ * Defined if issetugid(2) is available.
+ */
+#define JEMALLOC_HAVE_ISSETUGID
+
+/* Defined if pthread_atfork(3) is available. */
+#define JEMALLOC_HAVE_PTHREAD_ATFORK
+
+/* Defined if pthread_setname_np(3) is available. */
+/* #undef JEMALLOC_HAVE_PTHREAD_SETNAME_NP */
+
+/*
+ * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available.
+ */
+/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE */
+
+/*
+ * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available.
+ */
+/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC */
+
+/*
+ * Defined if mach_absolute_time() is available.
+ */
+#define JEMALLOC_HAVE_MACH_ABSOLUTE_TIME 1
+
+/*
+ * Defined if clock_gettime(CLOCK_REALTIME, ...) is available.
+ */
+#define JEMALLOC_HAVE_CLOCK_REALTIME 1
+
+/*
+ * Defined if _malloc_thread_cleanup() exists.  At least in the case of
+ * FreeBSD, pthread_key_create() allocates, which if used during malloc
+ * bootstrapping will cause recursion into the pthreads library.  Therefore, if
+ * _malloc_thread_cleanup() exists, use it as the basis for thread cleanup in
+ * malloc_tsd.
+ */
+/* #undef JEMALLOC_MALLOC_THREAD_CLEANUP */
+
+/*
+ * Defined if threaded initialization is known to be safe on this platform.
+ * Among other things, it must be possible to initialize a mutex without
+ * triggering allocation in order for threaded allocation to be safe.
+ */
+/* #undef JEMALLOC_THREADED_INIT */
+
+/*
+ * Defined if the pthreads implementation defines
+ * _pthread_mutex_init_calloc_cb(), in which case the function is used in order
+ * to avoid recursive allocation during mutex initialization.
+ */
+/* #undef JEMALLOC_MUTEX_INIT_CB */
+
+/* Non-empty if the tls_model attribute is supported. */
+#define JEMALLOC_TLS_MODEL __attribute__((tls_model("initial-exec")))
+
+/*
+ * JEMALLOC_DEBUG enables assertions and other sanity checks, and disables
+ * inline functions.
+ */
+/* #undef JEMALLOC_DEBUG */
+
+/* JEMALLOC_STATS enables statistics calculation. */
+#define JEMALLOC_STATS
+
+/* JEMALLOC_EXPERIMENTAL_SMALLOCX_API enables experimental smallocx API. */
+/* #undef JEMALLOC_EXPERIMENTAL_SMALLOCX_API */
+
+/* JEMALLOC_PROF enables allocation profiling. */
+/* #undef JEMALLOC_PROF */
+
+/* Use libunwind for profile backtracing if defined. */
+/* #undef JEMALLOC_PROF_LIBUNWIND */
+
+/* Use libgcc for profile backtracing if defined. */
+/* #undef JEMALLOC_PROF_LIBGCC */
+
+/* Use gcc intrinsics for profile backtracing if defined. */
+/* #undef JEMALLOC_PROF_GCC */
+
+/*
+ * JEMALLOC_DSS enables use of sbrk(2) to allocate extents from the data storage
+ * segment (DSS).
+ */
+/* #undef JEMALLOC_DSS */
+
+/* Support memory filling (junk/zero). */
+#define JEMALLOC_FILL
+
+/* Support utrace(2)-based tracing. */
+/* #undef JEMALLOC_UTRACE */
+
+/* Support optional abort() on OOM. */
+/* #undef JEMALLOC_XMALLOC */
+
+/* Support lazy locking (avoid locking unless a second thread is launched). */
+/* #undef JEMALLOC_LAZY_LOCK */
+
+/*
+ * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size
+ * classes).
+ */
+/* #undef LG_QUANTUM */
+
+/* One page is 2^LG_PAGE bytes. */
+#define LG_PAGE 12
+
+/*
+ * One huge page is 2^LG_HUGEPAGE bytes.  Note that this is defined even if the
+ * system does not explicitly support huge pages; system calls that require
+ * explicit huge page support are separately configured.
+ */
+#define LG_HUGEPAGE 21
+
+/*
+ * If defined, adjacent virtual memory mappings with identical attributes
+ * automatically coalesce, and they fragment when changes are made to subranges.
+ * This is the normal order of things for mmap()/munmap(), but on Windows
+ * VirtualAlloc()/VirtualFree() operations must be precisely matched, i.e.
+ * mappings do *not* coalesce/fragment.
+ */
+#define JEMALLOC_MAPS_COALESCE
+
+/*
+ * If defined, retain memory for later reuse by default rather than using e.g.
+ * munmap() to unmap freed extents.  This is enabled on 64-bit Linux because
+ * common sequences of mmap()/munmap() calls will cause virtual memory map
+ * holes.
+ */
+/* #undef JEMALLOC_RETAIN */
+
+/* TLS is used to map arenas and magazine caches to threads. */
+/* #undef JEMALLOC_TLS */
+
+/*
+ * Used to mark unreachable code to quiet "end of non-void" compiler warnings.
+ * Don't use this directly; instead use unreachable() from util.h
+ */
+#define JEMALLOC_INTERNAL_UNREACHABLE __builtin_unreachable
+
+/*
+ * ffs*() functions to use for bitmapping.  Don't use these directly; instead,
+ * use ffs_*() from util.h.
+ */
+#define JEMALLOC_INTERNAL_FFSLL __builtin_ffsll
+#define JEMALLOC_INTERNAL_FFSL __builtin_ffsl
+#define JEMALLOC_INTERNAL_FFS __builtin_ffs
+
+/*
+ * popcount*() functions to use for bitmapping.
+ */
+#define JEMALLOC_INTERNAL_POPCOUNTL __builtin_popcountl
+#define JEMALLOC_INTERNAL_POPCOUNT __builtin_popcount
+
+/*
+ * If defined, explicitly attempt to more uniformly distribute large allocation
+ * pointer alignments across all cache indices.
+ */
+#define JEMALLOC_CACHE_OBLIVIOUS
+
+/*
+ * If defined, enable logging facilities.  We make this a configure option to
+ * avoid taking extra branches everywhere.
+ */
+/* #undef JEMALLOC_LOG */
+
+/*
+ * If defined, use readlinkat() (instead of readlink()) to follow
+ * /etc/malloc_conf.
+ */
+/* #undef JEMALLOC_READLINKAT */
+
+/*
+ * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings.
+ */
+#define JEMALLOC_ZONE
+
+/*
+ * Methods for determining whether the OS overcommits.
+ * JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY: Linux's
+ *                                         /proc/sys/vm.overcommit_memory file.
+ * JEMALLOC_SYSCTL_VM_OVERCOMMIT: FreeBSD's vm.overcommit sysctl.
+ */
+/* #undef JEMALLOC_SYSCTL_VM_OVERCOMMIT */
+/* #undef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY */
+
+/* Defined if madvise(2) is available. */
+#define JEMALLOC_HAVE_MADVISE
+
+/*
+ * Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE
+ * arguments to madvise(2).
+ */
+/* #undef JEMALLOC_HAVE_MADVISE_HUGE */
+
+/*
+ * Methods for purging unused pages differ between operating systems.
+ *
+ *   madvise(..., MADV_FREE) : This marks pages as being unused, such that they
+ *                             will be discarded rather than swapped out.
+ *   madvise(..., MADV_DONTNEED) : If JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS is
+ *                                 defined, this immediately discards pages,
+ *                                 such that new pages will be demand-zeroed if
+ *                                 the address region is later touched;
+ *                                 otherwise this behaves similarly to
+ *                                 MADV_FREE, though typically with higher
+ *                                 system overhead.
+ */
+#define JEMALLOC_PURGE_MADVISE_FREE
+#define JEMALLOC_PURGE_MADVISE_DONTNEED
+/* #undef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS */
+
+/* Defined if madvise(2) is available but MADV_FREE is not (x86 Linux only). */
+/* #undef JEMALLOC_DEFINE_MADVISE_FREE */
+
+/*
+ * Defined if MADV_DO[NT]DUMP is supported as an argument to madvise.
+ */
+/* #undef JEMALLOC_MADVISE_DONTDUMP */
+
+/*
+ * Defined if transparent huge pages (THPs) are supported via the
+ * MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled.
+ */
+/* #undef JEMALLOC_THP */
+
+/* Define if operating system has alloca.h header. */
+/* #undef JEMALLOC_HAS_ALLOCA_H */
+
+/* C99 restrict keyword supported. */
+#define JEMALLOC_HAS_RESTRICT 1
+
+/* For use by hash code. */
+/* #undef JEMALLOC_BIG_ENDIAN */
+
+/* sizeof(int) == 2^LG_SIZEOF_INT. */
+#define LG_SIZEOF_INT 2
+
+/* sizeof(long) == 2^LG_SIZEOF_LONG. */
+#define LG_SIZEOF_LONG 3
+
+/* sizeof(long long) == 2^LG_SIZEOF_LONG_LONG. */
+#define LG_SIZEOF_LONG_LONG 3
+
+/* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */
+#define LG_SIZEOF_INTMAX_T 3
+
+/* glibc malloc hooks (__malloc_hook, __realloc_hook, __free_hook). */
+/* #undef JEMALLOC_GLIBC_MALLOC_HOOK */
+
+/* glibc memalign hook. */
+/* #undef JEMALLOC_GLIBC_MEMALIGN_HOOK */
+
+/* pthread support */
+#define JEMALLOC_HAVE_PTHREAD
+
+/* dlsym() support */
+#define JEMALLOC_HAVE_DLSYM
+
+/* Adaptive mutex support in pthreads. */
+/* #undef JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP */
+
+/* GNU specific sched_getcpu support */
+/* #undef JEMALLOC_HAVE_SCHED_GETCPU */
+
+/* GNU specific sched_setaffinity support */
+/* #undef JEMALLOC_HAVE_SCHED_SETAFFINITY */
+
+/*
+ * If defined, all the features necessary for background threads are present.
+ */
+/* #undef JEMALLOC_BACKGROUND_THREAD */
+
+/*
+ * If defined, jemalloc symbols are not exported (doesn't work when
+ * JEMALLOC_PREFIX is not defined).
+ */
+/* #undef JEMALLOC_EXPORT */
+
+/* config.malloc_conf options string. */
+#define JEMALLOC_CONFIG_MALLOC_CONF "@JEMALLOC_CONFIG_MALLOC_CONF@"
+
+/* If defined, jemalloc takes the malloc/free/etc. symbol names. */
+/* #undef JEMALLOC_IS_MALLOC */
+
+/*
+ * Defined if strerror_r returns char * if _GNU_SOURCE is defined.
+ */
+/* #undef JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE */
+
+/* Performs additional safety checks when defined. */
+/* #undef JEMALLOC_OPT_SAFETY_CHECKS */
+
+#endif /* JEMALLOC_INTERNAL_DEFS_H_ */
--- a/contrib/jemalloc-cmake/include_freebsd_aarch64/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/contrib/jemalloc-cmake/include_freebsd_aarch64/jemalloc/internal/jemalloc_internal_defs.h.in
@ -0,0 +1,373 @@
+/* include/jemalloc/internal/jemalloc_internal_defs.h.  Generated from jemalloc_internal_defs.h.in by configure.  */
+#ifndef JEMALLOC_INTERNAL_DEFS_H_
+#define JEMALLOC_INTERNAL_DEFS_H_
+/*
+ * If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all
+ * public APIs to be prefixed.  This makes it possible, with some care, to use
+ * multiple allocators simultaneously.
+ */
+/* #undef JEMALLOC_PREFIX */
+/* #undef JEMALLOC_CPREFIX */
+
+/*
+ * Define overrides for non-standard allocator-related functions if they are
+ * present on the system.
+ */
+/* #undef JEMALLOC_OVERRIDE___LIBC_CALLOC */
+/* #undef JEMALLOC_OVERRIDE___LIBC_FREE */
+/* #undef JEMALLOC_OVERRIDE___LIBC_MALLOC */
+/* #undef JEMALLOC_OVERRIDE___LIBC_MEMALIGN */
+/* #undef JEMALLOC_OVERRIDE___LIBC_REALLOC */
+/* #undef JEMALLOC_OVERRIDE___LIBC_VALLOC */
+#define JEMALLOC_OVERRIDE___POSIX_MEMALIGN
+
+/*
+ * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs.
+ * For shared libraries, symbol visibility mechanisms prevent these symbols
+ * from being exported, but for static libraries, naming collisions are a real
+ * possibility.
+ */
+#define JEMALLOC_PRIVATE_NAMESPACE je_
+
+/*
+ * Hyper-threaded CPUs may need a special instruction inside spin loops in
+ * order to yield to another virtual CPU.
+ */
+#define CPU_SPINWAIT
+/* 1 if CPU_SPINWAIT is defined, 0 otherwise. */
+#define HAVE_CPU_SPINWAIT 0
+
+/*
+ * Number of significant bits in virtual addresses.  This may be less than the
+ * total number of bits in a pointer, e.g. on x64, for which the uppermost 16
+ * bits are the same as bit 47.
+ */
+#define LG_VADDR 48
+
+/* Defined if C11 atomics are available. */
+#define JEMALLOC_C11_ATOMICS 1
+
+/* Defined if GCC __atomic atomics are available. */
+#define JEMALLOC_GCC_ATOMIC_ATOMICS 1
+/* and the 8-bit variant support. */
+#define JEMALLOC_GCC_U8_ATOMIC_ATOMICS 1
+
+/* Defined if GCC __sync atomics are available. */
+#define JEMALLOC_GCC_SYNC_ATOMICS 1
+/* and the 8-bit variant support. */
+#define JEMALLOC_GCC_U8_SYNC_ATOMICS 1
+
+/*
+ * Defined if __builtin_clz() and __builtin_clzl() are available.
+ */
+#define JEMALLOC_HAVE_BUILTIN_CLZ
+
+/*
+ * Defined if os_unfair_lock_*() functions are available, as provided by Darwin.
+ */
+/* #undef JEMALLOC_OS_UNFAIR_LOCK */
+
+/* Defined if syscall(2) is usable. */
+#define JEMALLOC_USE_SYSCALL
+
+/*
+ * Defined if secure_getenv(3) is available.
+ */
+/* #undef JEMALLOC_HAVE_SECURE_GETENV */
+
+/*
+ * Defined if issetugid(2) is available.
+ */
+#define JEMALLOC_HAVE_ISSETUGID
+
+/* Defined if pthread_atfork(3) is available. */
+#define JEMALLOC_HAVE_PTHREAD_ATFORK
+
+/* Defined if pthread_setname_np(3) is available. */
+// Only since 12.1-STABLE
+// #define JEMALLOC_HAVE_PTHREAD_SETNAME_NP
+
+/*
+ * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available.
+ */
+/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE */
+
+/*
+ * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available.
+ */
+#define JEMALLOC_HAVE_CLOCK_MONOTONIC 1
+
+/*
+ * Defined if mach_absolute_time() is available.
+ */
+/* #undef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME */
+
+/*
+ * Defined if clock_gettime(CLOCK_REALTIME, ...) is available.
+ */
+#define JEMALLOC_HAVE_CLOCK_REALTIME 1
+
+/*
+ * Defined if _malloc_thread_cleanup() exists.  At least in the case of
+ * FreeBSD, pthread_key_create() allocates, which if used during malloc
+ * bootstrapping will cause recursion into the pthreads library.  Therefore, if
+ * _malloc_thread_cleanup() exists, use it as the basis for thread cleanup in
+ * malloc_tsd.
+ */
+#define JEMALLOC_MALLOC_THREAD_CLEANUP
+
+/*
+ * Defined if threaded initialization is known to be safe on this platform.
+ * Among other things, it must be possible to initialize a mutex without
+ * triggering allocation in order for threaded allocation to be safe.
+ */
+/* #undef JEMALLOC_THREADED_INIT */
+
+/*
+ * Defined if the pthreads implementation defines
+ * _pthread_mutex_init_calloc_cb(), in which case the function is used in order
+ * to avoid recursive allocation during mutex initialization.
+ */
+#define JEMALLOC_MUTEX_INIT_CB 1
+
+/* Non-empty if the tls_model attribute is supported. */
+#define JEMALLOC_TLS_MODEL __attribute__((tls_model("initial-exec")))
+
+/*
+ * JEMALLOC_DEBUG enables assertions and other sanity checks, and disables
+ * inline functions.
+ */
+/* #undef JEMALLOC_DEBUG */
+
+/* JEMALLOC_STATS enables statistics calculation. */
+#define JEMALLOC_STATS
+
+/* JEMALLOC_EXPERIMENTAL_SMALLOCX_API enables experimental smallocx API. */
+/* #undef JEMALLOC_EXPERIMENTAL_SMALLOCX_API */
+
+/* JEMALLOC_PROF enables allocation profiling. */
+/* #undef JEMALLOC_PROF */
+
+/* Use libunwind for profile backtracing if defined. */
+/* #undef JEMALLOC_PROF_LIBUNWIND */
+
+/* Use libgcc for profile backtracing if defined. */
+/* #undef JEMALLOC_PROF_LIBGCC */
+
+/* Use gcc intrinsics for profile backtracing if defined. */
+/* #undef JEMALLOC_PROF_GCC */
+
+/*
+ * JEMALLOC_DSS enables use of sbrk(2) to allocate extents from the data storage
+ * segment (DSS).
+ */
+#define JEMALLOC_DSS
+
+/* Support memory filling (junk/zero). */
+#define JEMALLOC_FILL
+
+/* Support utrace(2)-based tracing. */
+/* #undef JEMALLOC_UTRACE */
+
+/* Support optional abort() on OOM. */
+/* #undef JEMALLOC_XMALLOC */
+
+/* Support lazy locking (avoid locking unless a second thread is launched). */
+#define JEMALLOC_LAZY_LOCK
+
+/*
+ * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size
+ * classes).
+ */
+/* #undef LG_QUANTUM */
+
+/* One page is 2^LG_PAGE bytes. */
+#define LG_PAGE 16
+
+/*
+ * One huge page is 2^LG_HUGEPAGE bytes.  Note that this is defined even if the
+ * system does not explicitly support huge pages; system calls that require
+ * explicit huge page support are separately configured.
+ */
+#define LG_HUGEPAGE 29
+
+/*
+ * If defined, adjacent virtual memory mappings with identical attributes
+ * automatically coalesce, and they fragment when changes are made to subranges.
+ * This is the normal order of things for mmap()/munmap(), but on Windows
+ * VirtualAlloc()/VirtualFree() operations must be precisely matched, i.e.
+ * mappings do *not* coalesce/fragment.
+ */
+#define JEMALLOC_MAPS_COALESCE
+
+/*
+ * If defined, retain memory for later reuse by default rather than using e.g.
+ * munmap() to unmap freed extents.  This is enabled on 64-bit Linux because
+ * common sequences of mmap()/munmap() calls will cause virtual memory map
+ * holes.
+ */
+/* #undef JEMALLOC_RETAIN */
+
+/* TLS is used to map arenas and magazine caches to threads. */
+#define JEMALLOC_TLS
+
+/*
+ * Used to mark unreachable code to quiet "end of non-void" compiler warnings.
+ * Don't use this directly; instead use unreachable() from util.h
+ */
+#define JEMALLOC_INTERNAL_UNREACHABLE __builtin_unreachable
+
+/*
+ * ffs*() functions to use for bitmapping.  Don't use these directly; instead,
+ * use ffs_*() from util.h.
+ */
+#define JEMALLOC_INTERNAL_FFSLL __builtin_ffsll
+#define JEMALLOC_INTERNAL_FFSL __builtin_ffsl
+#define JEMALLOC_INTERNAL_FFS __builtin_ffs
+
+/*
+ * popcount*() functions to use for bitmapping.
+ */
+#define JEMALLOC_INTERNAL_POPCOUNTL __builtin_popcountl
+#define JEMALLOC_INTERNAL_POPCOUNT __builtin_popcount
+
+/*
+ * If defined, explicitly attempt to more uniformly distribute large allocation
+ * pointer alignments across all cache indices.
+ */
+#define JEMALLOC_CACHE_OBLIVIOUS
+
+/*
+ * If defined, enable logging facilities.  We make this a configure option to
+ * avoid taking extra branches everywhere.
+ */
+/* #undef JEMALLOC_LOG */
+
+/*
+ * If defined, use readlinkat() (instead of readlink()) to follow
+ * /etc/malloc_conf.
+ */
+/* #undef JEMALLOC_READLINKAT */
+
+/*
+ * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings.
+ */
+/* #undef JEMALLOC_ZONE */
+
+/*
+ * Methods for determining whether the OS overcommits.
+ * JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY: Linux's
+ *                                         /proc/sys/vm.overcommit_memory file.
+ * JEMALLOC_SYSCTL_VM_OVERCOMMIT: FreeBSD's vm.overcommit sysctl.
+ */
+#define JEMALLOC_SYSCTL_VM_OVERCOMMIT
+/* #undef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY */
+
+/* Defined if madvise(2) is available. */
+#define JEMALLOC_HAVE_MADVISE
+
+/*
+ * Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE
+ * arguments to madvise(2).
+ */
+/* #undef JEMALLOC_HAVE_MADVISE_HUGE */
+
+/*
+ * Methods for purging unused pages differ between operating systems.
+ *
+ *   madvise(..., MADV_FREE) : This marks pages as being unused, such that they
+ *                             will be discarded rather than swapped out.
+ *   madvise(..., MADV_DONTNEED) : If JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS is
+ *                                 defined, this immediately discards pages,
+ *                                 such that new pages will be demand-zeroed if
+ *                                 the address region is later touched;
+ *                                 otherwise this behaves similarly to
+ *                                 MADV_FREE, though typically with higher
+ *                                 system overhead.
+ */
+#define JEMALLOC_PURGE_MADVISE_FREE
+#define JEMALLOC_PURGE_MADVISE_DONTNEED
+/* #undef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS */
+
+/* Defined if madvise(2) is available but MADV_FREE is not (x86 Linux only). */
+/* #undef JEMALLOC_DEFINE_MADVISE_FREE */
+
+/*
+ * Defined if MADV_DO[NT]DUMP is supported as an argument to madvise.
+ */
+/* #undef JEMALLOC_MADVISE_DONTDUMP */
+
+/*
+ * Defined if transparent huge pages (THPs) are supported via the
+ * MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled.
+ */
+/* #undef JEMALLOC_THP */
+
+/* Define if operating system has alloca.h header. */
+/* #undef JEMALLOC_HAS_ALLOCA_H */
+
+/* C99 restrict keyword supported. */
+#define JEMALLOC_HAS_RESTRICT 1
+
+/* For use by hash code. */
+/* #undef JEMALLOC_BIG_ENDIAN */
+
+/* sizeof(int) == 2^LG_SIZEOF_INT. */
+#define LG_SIZEOF_INT 2
+
+/* sizeof(long) == 2^LG_SIZEOF_LONG. */
+#define LG_SIZEOF_LONG 3
+
+/* sizeof(long long) == 2^LG_SIZEOF_LONG_LONG. */
+#define LG_SIZEOF_LONG_LONG 3
+
+/* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */
+#define LG_SIZEOF_INTMAX_T 3
+
+/* glibc malloc hooks (__malloc_hook, __realloc_hook, __free_hook). */
+/* #undef JEMALLOC_GLIBC_MALLOC_HOOK */
+
+/* glibc memalign hook. */
+/* #undef JEMALLOC_GLIBC_MEMALIGN_HOOK */
+
+/* pthread support */
+#define JEMALLOC_HAVE_PTHREAD
+
+/* dlsym() support */
+#define JEMALLOC_HAVE_DLSYM
+
+/* Adaptive mutex support in pthreads. */
+#define JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP
+
+/* GNU specific sched_getcpu support */
+/* #undef JEMALLOC_HAVE_SCHED_GETCPU */
+
+/* GNU specific sched_setaffinity support */
+/* #undef JEMALLOC_HAVE_SCHED_SETAFFINITY */
+
+/*
+ * If defined, all the features necessary for background threads are present.
+ */
+#define JEMALLOC_BACKGROUND_THREAD 1
+
+/*
+ * If defined, jemalloc symbols are not exported (doesn't work when
+ * JEMALLOC_PREFIX is not defined).
+ */
+/* #undef JEMALLOC_EXPORT */
+
+/* config.malloc_conf options string. */
+#define JEMALLOC_CONFIG_MALLOC_CONF "@JEMALLOC_CONFIG_MALLOC_CONF@"
+
+/* If defined, jemalloc takes the malloc/free/etc. symbol names. */
+#define JEMALLOC_IS_MALLOC 1
+
+/*
+ * Defined if strerror_r returns char * if _GNU_SOURCE is defined.
+ */
+/* #undef JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE */
+
+/* Performs additional safety checks when defined. */
+/* #undef JEMALLOC_OPT_SAFETY_CHECKS */
+
+#endif /* JEMALLOC_INTERNAL_DEFS_H_ */
--- a/contrib/jemalloc-cmake/include_freebsd_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/contrib/jemalloc-cmake/include_freebsd_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in
@ -0,0 +1,373 @@
+/* include/jemalloc/internal/jemalloc_internal_defs.h.  Generated from jemalloc_internal_defs.h.in by configure.  */
+#ifndef JEMALLOC_INTERNAL_DEFS_H_
+#define JEMALLOC_INTERNAL_DEFS_H_
+/*
+ * If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all
+ * public APIs to be prefixed.  This makes it possible, with some care, to use
+ * multiple allocators simultaneously.
+ */
+/* #undef JEMALLOC_PREFIX */
+/* #undef JEMALLOC_CPREFIX */
+
+/*
+ * Define overrides for non-standard allocator-related functions if they are
+ * present on the system.
+ */
+/* #undef JEMALLOC_OVERRIDE___LIBC_CALLOC */
+/* #undef JEMALLOC_OVERRIDE___LIBC_FREE */
+/* #undef JEMALLOC_OVERRIDE___LIBC_MALLOC */
+/* #undef JEMALLOC_OVERRIDE___LIBC_MEMALIGN */
+/* #undef JEMALLOC_OVERRIDE___LIBC_REALLOC */
+/* #undef JEMALLOC_OVERRIDE___LIBC_VALLOC */
+#define JEMALLOC_OVERRIDE___POSIX_MEMALIGN
+
+/*
+ * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs.
+ * For shared libraries, symbol visibility mechanisms prevent these symbols
+ * from being exported, but for static libraries, naming collisions are a real
+ * possibility.
+ */
+#define JEMALLOC_PRIVATE_NAMESPACE je_
+
+/*
+ * Hyper-threaded CPUs may need a special instruction inside spin loops in
+ * order to yield to another virtual CPU.
+ */
+#define CPU_SPINWAIT __asm__ volatile("pause")
+/* 1 if CPU_SPINWAIT is defined, 0 otherwise. */
+#define HAVE_CPU_SPINWAIT 1
+
+/*
+ * Number of significant bits in virtual addresses.  This may be less than the
+ * total number of bits in a pointer, e.g. on x64, for which the uppermost 16
+ * bits are the same as bit 47.
+ */
+#define LG_VADDR 48
+
+/* Defined if C11 atomics are available. */
+#define JEMALLOC_C11_ATOMICS 1
+
+/* Defined if GCC __atomic atomics are available. */
+#define JEMALLOC_GCC_ATOMIC_ATOMICS 1
+/* and the 8-bit variant support. */
+#define JEMALLOC_GCC_U8_ATOMIC_ATOMICS 1
+
+/* Defined if GCC __sync atomics are available. */
+#define JEMALLOC_GCC_SYNC_ATOMICS 1
+/* and the 8-bit variant support. */
+#define JEMALLOC_GCC_U8_SYNC_ATOMICS 1
+
+/*
+ * Defined if __builtin_clz() and __builtin_clzl() are available.
+ */
+#define JEMALLOC_HAVE_BUILTIN_CLZ
+
+/*
+ * Defined if os_unfair_lock_*() functions are available, as provided by Darwin.
+ */
+/* #undef JEMALLOC_OS_UNFAIR_LOCK */
+
+/* Defined if syscall(2) is usable. */
+#define JEMALLOC_USE_SYSCALL
+
+/*
+ * Defined if secure_getenv(3) is available.
+ */
+/* #undef JEMALLOC_HAVE_SECURE_GETENV */
+
+/*
+ * Defined if issetugid(2) is available.
+ */
+#define JEMALLOC_HAVE_ISSETUGID
+
+/* Defined if pthread_atfork(3) is available. */
+#define JEMALLOC_HAVE_PTHREAD_ATFORK
+
+/* Defined if pthread_setname_np(3) is available. */
+// Only since 12.1-STABLE
+// #define JEMALLOC_HAVE_PTHREAD_SETNAME_NP
+
+/*
+ * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available.
+ */
+/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE */
+
+/*
+ * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available.
+ */
+#define JEMALLOC_HAVE_CLOCK_MONOTONIC 1
+
+/*
+ * Defined if mach_absolute_time() is available.
+ */
+/* #undef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME */
+
+/*
+ * Defined if clock_gettime(CLOCK_REALTIME, ...) is available.
+ */
+#define JEMALLOC_HAVE_CLOCK_REALTIME 1
+
+/*
+ * Defined if _malloc_thread_cleanup() exists.  At least in the case of
+ * FreeBSD, pthread_key_create() allocates, which if used during malloc
+ * bootstrapping will cause recursion into the pthreads library.  Therefore, if
+ * _malloc_thread_cleanup() exists, use it as the basis for thread cleanup in
+ * malloc_tsd.
+ */
+#define JEMALLOC_MALLOC_THREAD_CLEANUP
+
+/*
+ * Defined if threaded initialization is known to be safe on this platform.
+ * Among other things, it must be possible to initialize a mutex without
+ * triggering allocation in order for threaded allocation to be safe.
+ */
+/* #undef JEMALLOC_THREADED_INIT */
+
+/*
+ * Defined if the pthreads implementation defines
+ * _pthread_mutex_init_calloc_cb(), in which case the function is used in order
+ * to avoid recursive allocation during mutex initialization.
+ */
+#define JEMALLOC_MUTEX_INIT_CB 1
+
+/* Non-empty if the tls_model attribute is supported. */
+#define JEMALLOC_TLS_MODEL __attribute__((tls_model("initial-exec")))
+
+/*
+ * JEMALLOC_DEBUG enables assertions and other sanity checks, and disables
+ * inline functions.
+ */
+/* #undef JEMALLOC_DEBUG */
+
+/* JEMALLOC_STATS enables statistics calculation. */
+#define JEMALLOC_STATS
+
+/* JEMALLOC_EXPERIMENTAL_SMALLOCX_API enables experimental smallocx API. */
+/* #undef JEMALLOC_EXPERIMENTAL_SMALLOCX_API */
+
+/* JEMALLOC_PROF enables allocation profiling. */
+/* #undef JEMALLOC_PROF */
+
+/* Use libunwind for profile backtracing if defined. */
+/* #undef JEMALLOC_PROF_LIBUNWIND */
+
+/* Use libgcc for profile backtracing if defined. */
+/* #undef JEMALLOC_PROF_LIBGCC */
+
+/* Use gcc intrinsics for profile backtracing if defined. */
+/* #undef JEMALLOC_PROF_GCC */
+
+/*
+ * JEMALLOC_DSS enables use of sbrk(2) to allocate extents from the data storage
+ * segment (DSS).
+ */
+#define JEMALLOC_DSS
+
+/* Support memory filling (junk/zero). */
+#define JEMALLOC_FILL
+
+/* Support utrace(2)-based tracing. */
+/* #undef JEMALLOC_UTRACE */
+
+/* Support optional abort() on OOM. */
+/* #undef JEMALLOC_XMALLOC */
+
+/* Support lazy locking (avoid locking unless a second thread is launched). */
+#define JEMALLOC_LAZY_LOCK
+
+/*
+ * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size
+ * classes).
+ */
+/* #undef LG_QUANTUM */
+
+/* One page is 2^LG_PAGE bytes. */
+#define LG_PAGE 12
+
+/*
+ * One huge page is 2^LG_HUGEPAGE bytes.  Note that this is defined even if the
+ * system does not explicitly support huge pages; system calls that require
+ * explicit huge page support are separately configured.
+ */
+#define LG_HUGEPAGE 21
+
+/*
+ * If defined, adjacent virtual memory mappings with identical attributes
+ * automatically coalesce, and they fragment when changes are made to subranges.
+ * This is the normal order of things for mmap()/munmap(), but on Windows
+ * VirtualAlloc()/VirtualFree() operations must be precisely matched, i.e.
+ * mappings do *not* coalesce/fragment.
+ */
+#define JEMALLOC_MAPS_COALESCE
+
+/*
+ * If defined, retain memory for later reuse by default rather than using e.g.
+ * munmap() to unmap freed extents.  This is enabled on 64-bit Linux because
+ * common sequences of mmap()/munmap() calls will cause virtual memory map
+ * holes.
+ */
+/* #undef JEMALLOC_RETAIN */
+
+/* TLS is used to map arenas and magazine caches to threads. */
+#define JEMALLOC_TLS
+
+/*
+ * Used to mark unreachable code to quiet "end of non-void" compiler warnings.
+ * Don't use this directly; instead use unreachable() from util.h
+ */
+#define JEMALLOC_INTERNAL_UNREACHABLE __builtin_unreachable
+
+/*
+ * ffs*() functions to use for bitmapping.  Don't use these directly; instead,
+ * use ffs_*() from util.h.
+ */
+#define JEMALLOC_INTERNAL_FFSLL __builtin_ffsll
+#define JEMALLOC_INTERNAL_FFSL __builtin_ffsl
+#define JEMALLOC_INTERNAL_FFS __builtin_ffs
+
+/*
+ * popcount*() functions to use for bitmapping.
+ */
+#define JEMALLOC_INTERNAL_POPCOUNTL __builtin_popcountl
+#define JEMALLOC_INTERNAL_POPCOUNT __builtin_popcount
+
+/*
+ * If defined, explicitly attempt to more uniformly distribute large allocation
+ * pointer alignments across all cache indices.
+ */
+#define JEMALLOC_CACHE_OBLIVIOUS
+
+/*
+ * If defined, enable logging facilities.  We make this a configure option to
+ * avoid taking extra branches everywhere.
+ */
+/* #undef JEMALLOC_LOG */
+
+/*
+ * If defined, use readlinkat() (instead of readlink()) to follow
+ * /etc/malloc_conf.
+ */
+/* #undef JEMALLOC_READLINKAT */
+
+/*
+ * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings.
+ */
+/* #undef JEMALLOC_ZONE */
+
+/*
+ * Methods for determining whether the OS overcommits.
+ * JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY: Linux's
+ *                                         /proc/sys/vm.overcommit_memory file.
+ * JEMALLOC_SYSCTL_VM_OVERCOMMIT: FreeBSD's vm.overcommit sysctl.
+ */
+#define JEMALLOC_SYSCTL_VM_OVERCOMMIT
+/* #undef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY */
+
+/* Defined if madvise(2) is available. */
+#define JEMALLOC_HAVE_MADVISE
+
+/*
+ * Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE
+ * arguments to madvise(2).
+ */
+/* #undef JEMALLOC_HAVE_MADVISE_HUGE */
+
+/*
+ * Methods for purging unused pages differ between operating systems.
+ *
+ *   madvise(..., MADV_FREE) : This marks pages as being unused, such that they
+ *                             will be discarded rather than swapped out.
+ *   madvise(..., MADV_DONTNEED) : If JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS is
+ *                                 defined, this immediately discards pages,
+ *                                 such that new pages will be demand-zeroed if
+ *                                 the address region is later touched;
+ *                                 otherwise this behaves similarly to
+ *                                 MADV_FREE, though typically with higher
+ *                                 system overhead.
+ */
+#define JEMALLOC_PURGE_MADVISE_FREE
+#define JEMALLOC_PURGE_MADVISE_DONTNEED
+/* #undef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS */
+
+/* Defined if madvise(2) is available but MADV_FREE is not (x86 Linux only). */
+/* #undef JEMALLOC_DEFINE_MADVISE_FREE */
+
+/*
+ * Defined if MADV_DO[NT]DUMP is supported as an argument to madvise.
+ */
+/* #undef JEMALLOC_MADVISE_DONTDUMP */
+
+/*
+ * Defined if transparent huge pages (THPs) are supported via the
+ * MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled.
+ */
+/* #undef JEMALLOC_THP */
+
+/* Define if operating system has alloca.h header. */
+/* #undef JEMALLOC_HAS_ALLOCA_H */
+
+/* C99 restrict keyword supported. */
+#define JEMALLOC_HAS_RESTRICT 1
+
+/* For use by hash code. */
+/* #undef JEMALLOC_BIG_ENDIAN */
+
+/* sizeof(int) == 2^LG_SIZEOF_INT. */
+#define LG_SIZEOF_INT 2
+
+/* sizeof(long) == 2^LG_SIZEOF_LONG. */
+#define LG_SIZEOF_LONG 3
+
+/* sizeof(long long) == 2^LG_SIZEOF_LONG_LONG. */
+#define LG_SIZEOF_LONG_LONG 3
+
+/* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */
+#define LG_SIZEOF_INTMAX_T 3
+
+/* glibc malloc hooks (__malloc_hook, __realloc_hook, __free_hook). */
+/* #undef JEMALLOC_GLIBC_MALLOC_HOOK */
+
+/* glibc memalign hook. */
+/* #undef JEMALLOC_GLIBC_MEMALIGN_HOOK */
+
+/* pthread support */
+#define JEMALLOC_HAVE_PTHREAD
+
+/* dlsym() support */
+#define JEMALLOC_HAVE_DLSYM
+
+/* Adaptive mutex support in pthreads. */
+#define JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP
+
+/* GNU specific sched_getcpu support */
+/* #undef JEMALLOC_HAVE_SCHED_GETCPU */
+
+/* GNU specific sched_setaffinity support */
+/* #undef JEMALLOC_HAVE_SCHED_SETAFFINITY */
+
+/*
+ * If defined, all the features necessary for background threads are present.
+ */
+#define JEMALLOC_BACKGROUND_THREAD 1
+
+/*
+ * If defined, jemalloc symbols are not exported (doesn't work when
+ * JEMALLOC_PREFIX is not defined).
+ */
+/* #undef JEMALLOC_EXPORT */
+
+/* config.malloc_conf options string. */
+#define JEMALLOC_CONFIG_MALLOC_CONF "@JEMALLOC_CONFIG_MALLOC_CONF@"
+
+/* If defined, jemalloc takes the malloc/free/etc. symbol names. */
+#define JEMALLOC_IS_MALLOC 1
+
+/*
+ * Defined if strerror_r returns char * if _GNU_SOURCE is defined.
+ */
+/* #undef JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE */
+
+/* Performs additional safety checks when defined. */
+/* #undef JEMALLOC_OPT_SAFETY_CHECKS */
+
+#endif /* JEMALLOC_INTERNAL_DEFS_H_ */
--- a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h.in
@ -35,7 +35,7 @@
 */
 #define CPU_SPINWAIT
 /* 1 if CPU_SPINWAIT is defined, 0 otherwise. */
-#define HAVE_CPU_SPINWAIT 9
+#define HAVE_CPU_SPINWAIT 0

 /*
 * Number of significant bits in virtual addresses.  This may be less than the
--- a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_preamble.h
+++ b/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_preamble.h
@ -1,213 +0,0 @@
-#ifndef JEMALLOC_PREAMBLE_H
-#define JEMALLOC_PREAMBLE_H
-
-#include "jemalloc_internal_defs.h"
-#include "jemalloc/internal/jemalloc_internal_decls.h"
-
-#ifdef JEMALLOC_UTRACE
-#include <sys/ktrace.h>
-#endif
-
-#define JEMALLOC_NO_DEMANGLE
-#ifdef JEMALLOC_JET
-#  undef JEMALLOC_IS_MALLOC
-#  define JEMALLOC_N(n) jet_##n
-#  include "jemalloc/internal/public_namespace.h"
-#  define JEMALLOC_NO_RENAME
-#  include "jemalloc/jemalloc.h"
-#  undef JEMALLOC_NO_RENAME
-#else
-#  define JEMALLOC_N(n) je_##n
-#  include "jemalloc/jemalloc.h"
-#endif
-
-#if defined(JEMALLOC_OSATOMIC)
-#include <libkern/OSAtomic.h>
-#endif
-
-#ifdef JEMALLOC_ZONE
-#include <mach/mach_error.h>
-#include <mach/mach_init.h>
-#include <mach/vm_map.h>
-#endif
-
-#include "jemalloc/internal/jemalloc_internal_macros.h"
-
-/*
- * Note that the ordering matters here; the hook itself is name-mangled.  We
- * want the inclusion of hooks to happen early, so that we hook as much as
- * possible.
- */
-#ifndef JEMALLOC_NO_PRIVATE_NAMESPACE
-#  ifndef JEMALLOC_JET
-#    include "jemalloc/internal/private_namespace.h"
-#  else
-#    include "jemalloc/internal/private_namespace_jet.h"
-#  endif
-#endif
-#include "jemalloc/internal/test_hooks.h"
-
-#ifdef JEMALLOC_DEFINE_MADVISE_FREE
-#  define JEMALLOC_MADV_FREE 8
-#endif
-
-static const bool config_debug =
-#ifdef JEMALLOC_DEBUG
-    true
-#else
-    false
-#endif
-    ;
-static const bool have_dss =
-#ifdef JEMALLOC_DSS
-    true
-#else
-    false
-#endif
-    ;
-static const bool have_madvise_huge =
-#ifdef JEMALLOC_HAVE_MADVISE_HUGE
-    true
-#else
-    false
-#endif
-    ;
-static const bool config_fill =
-#ifdef JEMALLOC_FILL
-    true
-#else
-    false
-#endif
-    ;
-static const bool config_lazy_lock =
-#ifdef JEMALLOC_LAZY_LOCK
-    true
-#else
-    false
-#endif
-    ;
-static const char * const config_malloc_conf = JEMALLOC_CONFIG_MALLOC_CONF;
-static const bool config_prof =
-#ifdef JEMALLOC_PROF
-    true
-#else
-    false
-#endif
-    ;
-static const bool config_prof_libgcc =
-#ifdef JEMALLOC_PROF_LIBGCC
-    true
-#else
-    false
-#endif
-    ;
-static const bool config_prof_libunwind =
-#ifdef JEMALLOC_PROF_LIBUNWIND
-    true
-#else
-    false
-#endif
-    ;
-static const bool maps_coalesce =
-#ifdef JEMALLOC_MAPS_COALESCE
-    true
-#else
-    false
-#endif
-    ;
-static const bool config_stats =
-#ifdef JEMALLOC_STATS
-    true
-#else
-    false
-#endif
-    ;
-static const bool config_tls =
-#ifdef JEMALLOC_TLS
-    true
-#else
-    false
-#endif
-    ;
-static const bool config_utrace =
-#ifdef JEMALLOC_UTRACE
-    true
-#else
-    false
-#endif
-    ;
-static const bool config_xmalloc =
-#ifdef JEMALLOC_XMALLOC
-    true
-#else
-    false
-#endif
-    ;
-static const bool config_cache_oblivious =
-#ifdef JEMALLOC_CACHE_OBLIVIOUS
-    true
-#else
-    false
-#endif
-    ;
-/*
- * Undocumented, for jemalloc development use only at the moment.  See the note
- * in jemalloc/internal/log.h.
- */
-static const bool config_log =
-#ifdef JEMALLOC_LOG
-    true
-#else
-    false
-#endif
-    ;
-/*
- * Are extra safety checks enabled; things like checking the size of sized
- * deallocations, double-frees, etc.
- */
-static const bool config_opt_safety_checks =
-#ifdef JEMALLOC_OPT_SAFETY_CHECKS
-    true
-#elif defined(JEMALLOC_DEBUG)
-    /*
-     * This lets us only guard safety checks by one flag instead of two; fast
-     * checks can guard solely by config_opt_safety_checks and run in debug mode
-     * too.
-     */
-    true
-#else
-    false
-#endif
-    ;
-
-#if defined(_WIN32) || defined(JEMALLOC_HAVE_SCHED_GETCPU)
-/* Currently percpu_arena depends on sched_getcpu. */
-#define JEMALLOC_PERCPU_ARENA
-#endif
-static const bool have_percpu_arena =
-#ifdef JEMALLOC_PERCPU_ARENA
-    true
-#else
-    false
-#endif
-    ;
-/*
- * Undocumented, and not recommended; the application should take full
- * responsibility for tracking provenance.
- */
-static const bool force_ivsalloc =
-#ifdef JEMALLOC_FORCE_IVSALLOC
-    true
-#else
-    false
-#endif
-    ;
-static const bool have_background_thread =
-#ifdef JEMALLOC_BACKGROUND_THREAD
-    true
-#else
-    false
-#endif
-    ;
-
-#endif /* JEMALLOC_PREAMBLE_H */
--- a/docs/en/engines/table-engines/index.md
+++ b/docs/en/engines/table-engines/index.md
@ -19,7 +19,7 @@ The table engine (type of table) determines:

 ### MergeTree {#mergetree}

-The most universal and functional table engines for high-load tasks. The property shared by these engines is quick data insertion with subsequent background data processing. `MergeTree` family engines support data replication (with [Replicated\*](../../engines/table-engines/mergetree-family/replication.md#table_engines-replication) versions of engines), partitioning, and other features not supported in other engines.
+The most universal and functional table engines for high-load tasks. The property shared by these engines is quick data insertion with subsequent background data processing. `MergeTree` family engines support data replication (with [Replicated\*](../../engines/table-engines/mergetree-family/replication.md#table_engines-replication) versions of engines), partitioning, secondary data-skipping indexes, and other features not supported in other engines.

 Engines in the family:

@ -80,4 +80,4 @@ To select data from a virtual column, you must specify its name in the `SELECT`

 If you create a table with a column that has the same name as one of the table virtual columns, the virtual column becomes inaccessible. We don’t recommend doing this. To help avoid conflicts, virtual column names are usually prefixed with an underscore.

-[Original article](https://clickhouse.tech/docs/en/operations/table_engines/) <!--hide-->
+[Original article](https://clickhouse.tech/docs/en/engines/table-engines/) <!--hide-->
--- a/docs/en/getting-started/install.md
+++ b/docs/en/getting-started/install.md
@ -94,6 +94,18 @@ For production environments, it’s recommended to use the latest `stable`-versi

 To run ClickHouse inside Docker follow the guide on [Docker Hub](https://hub.docker.com/r/yandex/clickhouse-server/). Those images use official `deb` packages inside.

+### From Precompiled Binaries for Non-Standard Environments {#from-binaries-non-linux}
+
+For non-Linux operating systems and for AArch64 CPU arhitecture, ClickHouse builds are provided as a cross-compiled binary from the latest commit of the `master` branch (with a few hours delay).
+
+- [macOS](https://builds.clickhouse.tech/master/macos/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/macos/clickhouse' && chmod a+x ./clickhouse`
+- [FreeBSD](https://builds.clickhouse.tech/master/freebsd/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/freebsd/clickhouse' && chmod a+x ./clickhouse`
+- [AArch64](https://builds.clickhouse.tech/master/aarch64/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/aarch64/clickhouse' && chmod a+x ./clickhouse`
+
+After downloading, you can use the `clickhouse client` to connect to the server, or `clickhouse local` to process local data. To run `clickhouse server`, you have to additionally download [server](https://github.com/ClickHouse/ClickHouse/blob/master/programs/server/config.xml) and [users](https://github.com/ClickHouse/ClickHouse/blob/master/programs/server/users.xml) configuration files from GitHub.
+
+These builds are not recommended for use in production environments because they are less thoroughly tested, but you can do so on your own risk. They also have only a subset of ClickHouse features available.
+
 ### From Sources {#from-sources}

 To manually compile ClickHouse, follow the instructions for [Linux](../development/build.md) or [Mac OS X](../development/build-osx.md).
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@ -821,6 +821,10 @@ ClickHouse supports the following algorithms of choosing replicas:
 -   [First or random](#load_balancing-first_or_random)
 -   [Round robin](#load_balancing-round_robin)

+See also:
+
+-   [distributed\_replica\_max\_ignored\_errors](#settings-distributed_replica_max_ignored_errors)
+
 ### Random (by Default) {#load_balancing-random}

 ``` sql
@ -1170,8 +1174,10 @@ Controls how fast errors in distributed tables are zeroed. If a replica is unava

 See also:

+-   [load\_balancing](#load_balancing-round_robin)
 -   [Table engine Distributed](../../engines/table-engines/special/distributed.md)
 -   [distributed\_replica\_error\_cap](#settings-distributed_replica_error_cap)
+-   [distributed\_replica\_max\_ignored\_errors](#settings-distributed_replica_max_ignored_errors)

 ## distributed\_replica\_error\_cap {#settings-distributed_replica_error_cap}

@ -1182,8 +1188,24 @@ Error count of each replica is capped at this value, preventing a single replica

 See also:

+-   [load\_balancing](#load_balancing-round_robin)
 -   [Table engine Distributed](../../engines/table-engines/special/distributed.md)
 -   [distributed\_replica\_error\_half\_life](#settings-distributed_replica_error_half_life)
+-   [distributed\_replica\_max\_ignored\_errors](#settings-distributed_replica_max_ignored_errors)
+
+## distributed\_replica\_max\_ignored\_errors {#settings-distributed_replica_max_ignored_errors}
+
+-   Type: unsigned int
+-   Default value: 0
+
+Number of errors that will be ignored while choosing replicas (according to `load_balancing` algorithm).
+
+See also:
+
+-   [load\_balancing](#load_balancing-round_robin)
+-   [Table engine Distributed](../../engines/table-engines/special/distributed.md)
+-   [distributed\_replica\_error\_cap](#settings-distributed_replica_error_cap)
+-   [distributed\_replica\_error\_half\_life](#settings-distributed_replica_error_half_life)

 ## distributed\_directory\_monitor\_sleep\_time\_ms {#distributed_directory_monitor_sleep_time_ms}

--- a/docs/en/sql-reference/data-types/simpleaggregatefunction.md
+++ b/docs/en/sql-reference/data-types/simpleaggregatefunction.md
@ -9,6 +9,7 @@ The following aggregate functions are supported:
 -   [`min`](../../sql-reference/aggregate-functions/reference/min.md#agg_function-min)
 -   [`max`](../../sql-reference/aggregate-functions/reference/max.md#agg_function-max)
 -   [`sum`](../../sql-reference/aggregate-functions/reference/sum.md#agg_function-sum)
+-   [`sumWithOverflow`](../../sql-reference/aggregate-functions/reference/sumwithoverflow.md#sumwithoverflowx)
 -   [`groupBitAnd`](../../sql-reference/aggregate-functions/reference/groupbitand.md#groupbitand)
 -   [`groupBitOr`](../../sql-reference/aggregate-functions/reference/groupbitor.md#groupbitor)
 -   [`groupBitXor`](../../sql-reference/aggregate-functions/reference/groupbitxor.md#groupbitxor)
--- a/docs/en/sql-reference/functions/array-functions.md
+++ b/docs/en/sql-reference/functions/array-functions.md
@ -176,6 +176,54 @@ hasAny(array1, array2)

 `SELECT hasAll([[1, 2], [3, 4]], [[1, 2], [1, 2]])` returns `1`.

+## hasSubstr {#hassubstr}
+
+Checks whether all the elements of array2 appear in array1 in the same exact order. Therefore, the function will return 1, if and only if `array1 = prefix + array2 + suffix`.
+
+``` sql
+hasSubstr(array1, array2)
+```
+
+In other words, the functions will check whether all the elements of `array2` are contained in `array1` like 
+the `hasAll` function. In addition, it will check that the elements are observed in the same order in both `array1` and `array2`.
+
+For Example: 
+ - `hasSubstr([1,2,3,4], [2,3])` returns 1. However, `hasSubstr([1,2,3,4], [3,2])` will return `0`.
+ - `hasSubstr([1,2,3,4], [1,2,3])` returns 1. However, `hasSubstr([1,2,3,4], [1,2,4])` will return `0`.
+
+**Parameters**
+
+-   `array1` – Array of any type with a set of elements.
+-   `array2` – Array of any type with a set of elements.
+
+**Return values**
+
+-   `1`, if `array1` contains `array2`.
+-   `0`, otherwise.
+
+**Peculiar properties**
+
+-   The function will return `1` if `array2` is empty.
+-   `Null` processed as a value. In other words `hasSubstr([1, 2, NULL, 3, 4], [2,3])` will return `0`. However, `hasSubstr([1, 2, NULL, 3, 4], [2,NULL,3])` will return `1`
+-   Order of values in both of arrays does matter.
+
+**Examples**
+
+`SELECT hasSubstr([], [])` returns 1.
+
+`SELECT hasSubstr([1, Null], [Null])` returns 1.
+
+`SELECT hasSubstr([1.0, 2, 3, 4], [1, 3])` returns 0.
+
+`SELECT hasSubstr(['a', 'b'], ['a'])` returns 1.
+
+`SELECT hasSubstr(['a', 'b' , 'c'], ['a', 'b'])` returns 1.
+
+`SELECT hasSubstr(['a', 'b' , 'c'], ['a', 'c'])` returns 0.
+
+`SELECT hasSubstr([[1, 2], [3, 4], [5, 6]], [[1, 2], [3, 4]])` returns 1.
+
+
 ## indexOf(arr, x) {#indexofarr-x}

 Returns the index of the first ‘x’ element (starting from 1) if it is in the array, or 0 if it is not.
--- a/docs/ru/getting-started/install.md
+++ b/docs/ru/getting-started/install.md
@ -82,6 +82,18 @@ sudo clickhouse-client-$LATEST_VERSION/install/doinst.sh

 Для запуска ClickHouse в Docker нужно следовать инструкции на [Docker Hub](https://hub.docker.com/r/yandex/clickhouse-server/). Внутри образов используются официальные `deb` пакеты.

+### Из исполняемых файлов для нестандартных окружений {#from-binaries-non-linux}
+
+Для других операционных систем и арихитектуры AArch64, сборки ClickHouse предоставляются в виде кросс-компилированного бинарника с последнего коммита ветки master (с задержкой в несколько часов).
+
+- [macOS](https://builds.clickhouse.tech/master/macos/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/macos/clickhouse' && chmod a+x ./clickhouse`
+- [AArch64](https://builds.clickhouse.tech/master/aarch64/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/aarch64/clickhouse' && chmod a+x ./clickhouse`
+- [FreeBSD](https://builds.clickhouse.tech/master/freebsd/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/freebsd/clickhouse' && chmod a+x ./clickhouse`
+
+После скачивания, можно воспользоваться `clickhouse client` для подключения к серверу, или `clickhouse local` для обработки локальных данных. Для запуска `clickhouse server` необходимо скачать конфигурационные файлы [сервера](https://github.com/ClickHouse/ClickHouse/blob/master/programs/server/config.xml) и [пользователей](https://github.com/ClickHouse/ClickHouse/blob/master/programs/server/users.xml) с GitHub.
+
+Данные сборки не рекомендуются для использования в продакшене, так как они недостаточно тщательно протестированны. Также, в них присутствуют не все возможности ClickHouse.
+
 ### Из исходного кода {#from-sources}

 Для компиляции ClickHouse вручную, используйте инструкцию для [Linux](../development/build.md) или [Mac OS X](../development/build-osx.md).
--- a/docs/ru/interfaces/http.md
+++ b/docs/ru/interfaces/http.md
@ -276,7 +276,7 @@ $ curl -sS 'http://localhost:8123/?max_result_bytes=4000000&buffer_size=3000000&
 ### Пример {#primer}

 ``` bash
-$ curl -sS "<address>?param_id=2¶m_phrase=test" -d "SELECT * FROM table WHERE int_column = {id:UInt8} and string_column = {phrase:String}"
+$ curl -sS "http://localhost:8123/?param_id=2&param_phrase=test" -d "SELECT * FROM table WHERE int_column = {id:UInt8} and string_column = {phrase:String}"
 ```

 ## Предопределенный HTTP интерфейс {#predefined_http_interface}
--- a/docs/tools/README.md
+++ b/docs/tools/README.md
@ -20,7 +20,18 @@ Usually those also have some way to preview how Markdown will look like, which a

 It’ll take some effort to go through, but the result will be very close to production documentation.

-For the first time you’ll need to install [wkhtmltopdf](https://wkhtmltopdf.org/) and set up virtualenv:
+For the first time you’ll need to:
+
+#### 1. Install [wkhtmltopdf](https://wkhtmltopdf.org/)
+
+Follow the instructions on it's official website: <https://wkhtmltopdf.org/downloads.html>
+
+#### 2. Install CLI tools from npm
+
+1. `apt-get install npm` for Debian/Ubuntu or `brew install npm` on Mac OS X.
+2. `npm install -g purifycss amphtml-validator`.
+
+#### 3. Set up virtualenv

 ``` bash
 $ cd ClickHouse/docs/tools
@ -30,7 +41,9 @@ $ source venv/bin/activate
 $ pip3 install -r requirements.txt
 ```

-Then running `build.py` without args (there are some, check `build.py --help`) will generate `ClickHouse/docs/build` folder with complete static html website.
+#### 4. Run build.py
+
+When all prerequisites are installed, running `build.py` without args (there are some, check `build.py --help`) will generate `ClickHouse/docs/build` folder with complete static html website.

 The easiest way to see the result is to use `--livereload=8888` argument of build.py. Alternatively, you can manually launch a HTTP server to serve the docs, for example by running `cd ClickHouse/docs/build && python3 -m http.server 8888`. Then go to http://localhost:8888 in browser. Feel free to use any other port instead of 8888.

--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@ -123,7 +123,7 @@ private:
    };
    bool is_interactive = true;          /// Use either interactive line editing interface or batch mode.
    bool need_render_progress = true;    /// Render query execution progress.
-    bool send_logs    = false;           /// send_logs_level passed, do not use previous cursor position, to avoid overlaps with logs
+    bool has_received_logs = false;      /// We have received some logs, do not use previous cursor position, to avoid overlaps with logs
    bool echo_queries = false;           /// Print queries before execution in batch mode.
    bool ignore_error = false;           /// In case of errors, don't print error message, continue to next query. Only applicable for non-interactive mode.
    bool print_time_to_stderr = false;   /// Output execution time to stderr in batch mode.
@ -908,8 +908,6 @@ private:

            connection->forceConnected(connection_parameters.timeouts);

-            send_logs = context.getSettingsRef().send_logs_level != LogsLevel::none;
-
            ASTPtr input_function;
            if (insert && insert->select)
                insert->tryFindInputFunction(input_function);
@ -1519,6 +1517,7 @@ private:

    void onLogData(Block & block)
    {
+        has_received_logs = true;
        initLogsOutputStream();
        logs_out_stream->write(block);
        logs_out_stream->flush();
@ -1554,7 +1553,7 @@ private:
    void clearProgress()
    {
        written_progress_chars = 0;
-        if (!send_logs)
+        if (!has_received_logs)
            std::cerr << "\r" CLEAR_TO_END_OF_LINE;
    }

@ -1582,7 +1581,7 @@ private:

        const char * indicator = indicators[increment % 8];

-        if (!send_logs && written_progress_chars)
+        if (!has_received_logs && written_progress_chars)
            message << '\r';

        size_t prefix_size = message.count();
@ -1636,7 +1635,7 @@ private:

        message << CLEAR_TO_END_OF_LINE;

-        if (send_logs)
+        if (has_received_logs)
            message << '\n';

        ++increment;
--- a/src/Client/ConnectionPoolWithFailover.cpp
+++ b/src/Client/ConnectionPoolWithFailover.cpp
@ -84,7 +84,10 @@ IConnectionPool::Entry ConnectionPoolWithFailover::get(const ConnectionTimeouts
        break;
    }

-    return Base::get(try_get_entry, get_priority);
+    UInt64 max_ignored_errors = settings ? settings->distributed_replica_max_ignored_errors.value : 0;
+    bool fallback_to_stale_replicas = settings ? settings->fallback_to_stale_replicas_for_distributed_queries.value : true;
+
+    return Base::get(max_ignored_errors, fallback_to_stale_replicas, try_get_entry, get_priority);
 }

 ConnectionPoolWithFailover::Status ConnectionPoolWithFailover::getStatus() const
@ -206,9 +209,12 @@ std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::g
        break;
    }

-    bool fallback_to_stale_replicas = settings ? bool(settings->fallback_to_stale_replicas_for_distributed_queries) : true;
+    UInt64 max_ignored_errors = settings ? settings->distributed_replica_max_ignored_errors.value : 0;
+    bool fallback_to_stale_replicas = settings ? settings->fallback_to_stale_replicas_for_distributed_queries.value : true;

-    return Base::getMany(min_entries, max_entries, max_tries, try_get_entry, get_priority, fallback_to_stale_replicas);
+    return Base::getMany(min_entries, max_entries, max_tries,
+        max_ignored_errors, fallback_to_stale_replicas,
+        try_get_entry, get_priority);
 }

 ConnectionPoolWithFailover::TryResult
--- a/src/Common/CurrentThread.cpp
+++ b/src/Common/CurrentThread.cpp
@ -72,6 +72,13 @@ void CurrentThread::attachInternalTextLogsQueue(const std::shared_ptr<InternalTe
    current_thread->attachInternalTextLogsQueue(logs_queue, client_logs_level);
 }

+void CurrentThread::setFatalErrorCallback(std::function<void()> callback)
+{
+    if (unlikely(!current_thread))
+        return;
+    current_thread->setFatalErrorCallback(callback);
+}
+
 std::shared_ptr<InternalTextLogsQueue> CurrentThread::getInternalTextLogsQueue()
 {
    /// NOTE: this method could be called at early server startup stage
--- a/src/Common/CurrentThread.h
+++ b/src/Common/CurrentThread.h
@ -46,6 +46,8 @@ public:
                                            LogsLevel client_logs_level);
    static std::shared_ptr<InternalTextLogsQueue> getInternalTextLogsQueue();

+    static void setFatalErrorCallback(std::function<void()> callback);
+
    /// Makes system calls to update ProfileEvents that contain info from rusage and taskstats
    static void updatePerformanceCounters();

--- a/src/Common/PoolWithFailoverBase.h
+++ b/src/Common/PoolWithFailoverBase.h
@ -100,28 +100,28 @@ public:
    /// this functor. The pools with lower result value will be tried first.
    using GetPriorityFunc = std::function<size_t(size_t index)>;

-    /// Returns a single connection.
-    Entry get(const TryGetEntryFunc & try_get_entry, const GetPriorityFunc & get_priority = GetPriorityFunc());
-

    /// Returns at least min_entries and at most max_entries connections (at most one connection per nested pool).
    /// The method will throw if it is unable to get min_entries alive connections or
    /// if fallback_to_stale_replicas is false and it is unable to get min_entries connections to up-to-date replicas.
    std::vector<TryResult> getMany(
            size_t min_entries, size_t max_entries, size_t max_tries,
+            size_t max_ignored_errors,
+            bool fallback_to_stale_replicas,
            const TryGetEntryFunc & try_get_entry,
-            const GetPriorityFunc & get_priority = GetPriorityFunc(),
-            bool fallback_to_stale_replicas = true);
-
-    void reportError(const Entry & entry);
+            const GetPriorityFunc & get_priority = GetPriorityFunc());

 protected:
    struct PoolState;

    using PoolStates = std::vector<PoolState>;

+    /// Returns a single connection.
+    Entry get(size_t max_ignored_errors, bool fallback_to_stale_replicas,
+        const TryGetEntryFunc & try_get_entry, const GetPriorityFunc & get_priority = GetPriorityFunc());
+
    /// This function returns a copy of pool states to avoid race conditions when modifying shared pool states.
-    PoolStates updatePoolStates();
+    PoolStates updatePoolStates(size_t max_ignored_errors);
    PoolStates getPoolStates() const;

    NestedPools nested_pools;
@ -139,9 +139,13 @@ protected:

 template <typename TNestedPool>
 typename TNestedPool::Entry
-PoolWithFailoverBase<TNestedPool>::get(const TryGetEntryFunc & try_get_entry, const GetPriorityFunc & get_priority)
+PoolWithFailoverBase<TNestedPool>::get(size_t max_ignored_errors, bool fallback_to_stale_replicas,
+    const TryGetEntryFunc & try_get_entry, const GetPriorityFunc & get_priority)
 {
-    std::vector<TryResult> results = getMany(1, 1, 1, try_get_entry, get_priority);
+    std::vector<TryResult> results = getMany(
+        1 /* min entries */, 1 /* max entries */, 1 /* max tries */,
+        max_ignored_errors, fallback_to_stale_replicas,
+        try_get_entry, get_priority);
    if (results.empty() || results[0].entry.isNull())
        throw DB::Exception(
                "PoolWithFailoverBase::getMany() returned less than min_entries entries.",
@ -153,12 +157,13 @@ template <typename TNestedPool>
 std::vector<typename PoolWithFailoverBase<TNestedPool>::TryResult>
 PoolWithFailoverBase<TNestedPool>::getMany(
        size_t min_entries, size_t max_entries, size_t max_tries,
+        size_t max_ignored_errors,
+        bool fallback_to_stale_replicas,
        const TryGetEntryFunc & try_get_entry,
-        const GetPriorityFunc & get_priority,
-        bool fallback_to_stale_replicas)
+        const GetPriorityFunc & get_priority)
 {
    /// Update random numbers and error counts.
-    PoolStates pool_states = updatePoolStates();
+    PoolStates pool_states = updatePoolStates(max_ignored_errors);
    if (get_priority)
    {
        for (size_t i = 0; i < pool_states.size(); ++i)
@ -295,22 +300,6 @@ PoolWithFailoverBase<TNestedPool>::getMany(
    return try_results;
 }

-template <typename TNestedPool>
-void PoolWithFailoverBase<TNestedPool>::reportError(const Entry & entry)
-{
-    for (size_t i = 0; i < nested_pools.size(); ++i)
-    {
-        if (nested_pools[i]->contains(entry))
-        {
-            std::lock_guard lock(pool_states_mutex);
-            auto & pool_state = shared_pool_states[i];
-            pool_state.error_count = std::min(max_error_cap, pool_state.error_count + 1);
-            return;
-        }
-    }
-    throw DB::Exception("Can't find pool to report error", DB::ErrorCodes::LOGICAL_ERROR);
-}
-
 template <typename TNestedPool>
 struct PoolWithFailoverBase<TNestedPool>::PoolState
 {
@ -335,7 +324,7 @@ private:

 template <typename TNestedPool>
 typename PoolWithFailoverBase<TNestedPool>::PoolStates
-PoolWithFailoverBase<TNestedPool>::updatePoolStates()
+PoolWithFailoverBase<TNestedPool>::updatePoolStates(size_t max_ignored_errors)
 {
    PoolStates result;
    result.reserve(nested_pools.size());
@ -354,14 +343,17 @@ PoolWithFailoverBase<TNestedPool>::updatePoolStates()

            if (delta >= 0)
            {
+                const UInt64 MAX_BITS = sizeof(UInt64) * CHAR_BIT;
+                size_t shift_amount = MAX_BITS;
                /// Divide error counts by 2 every decrease_error_period seconds.
-                size_t shift_amount = delta / decrease_error_period;
+                if (decrease_error_period)
+                    shift_amount = delta / decrease_error_period;
                /// Update time but don't do it more often than once a period.
                /// Else if the function is called often enough, error count will never decrease.
                if (shift_amount)
                    last_error_decrease_time = current_time;

-                if (shift_amount >= sizeof(UInt64) * CHAR_BIT)
+                if (shift_amount >= MAX_BITS)
                {
                    for (auto & state : shared_pool_states)
                        state.error_count = 0;
@ -378,6 +370,11 @@ PoolWithFailoverBase<TNestedPool>::updatePoolStates()

        result.assign(shared_pool_states.begin(), shared_pool_states.end());
    }
+
+    /// distributed_replica_max_ignored_errors
+    for (auto & state : result)
+        state.error_count = std::max<UInt64>(0, state.error_count - max_ignored_errors);
+
    return result;
 }

--- a/src/Common/ThreadStatus.cpp
+++ b/src/Common/ThreadStatus.cpp
@ -98,4 +98,15 @@ void ThreadStatus::attachInternalTextLogsQueue(const InternalTextLogsQueuePtr &
    thread_group->client_logs_level = client_logs_level;
 }

+void ThreadStatus::setFatalErrorCallback(std::function<void()> callback)
+{
+    fatal_error_callback = std::move(callback);
+}
+
+void ThreadStatus::onFatalError()
+{
+    if (fatal_error_callback)
+        fatal_error_callback();
+}
+
 }
--- a/src/Common/ThreadStatus.h
+++ b/src/Common/ThreadStatus.h
@ -145,6 +145,10 @@ public:
    void attachInternalTextLogsQueue(const InternalTextLogsQueuePtr & logs_queue,
                                     LogsLevel client_logs_level);

+    /// Callback that is used to trigger sending fatal error messages to client.
+    void setFatalErrorCallback(std::function<void()> callback);
+    void onFatalError();
+
    /// Sets query context for current thread and its thread group
    /// NOTE: query_context have to be alive until detachQuery() is called
    void attachQueryContext(Context & query_context);
@ -200,6 +204,9 @@ protected:
    std::unique_ptr<RUsageCounters> last_rusage;
    std::unique_ptr<TasksStatsCounters> taskstats;

+    /// Is used to send logs from logs_queue to client in case of fatal errors.
+    std::function<void()> fatal_error_callback;
+
 private:
    void setupState(const ThreadGroupStatusPtr & thread_group_);
 };
--- a/src/Core/ExternalTable.cpp
+++ b/src/Core/ExternalTable.cpp
@ -167,7 +167,7 @@ void ExternalTablesHandler::handlePart(const Poco::Net::MessageHeader & header,
    auto temporary_table = TemporaryTableHolder(context, ColumnsDescription{columns}, {});
    auto storage = temporary_table.getTable();
    context.addExternalTable(data->table_name, std::move(temporary_table));
-    BlockOutputStreamPtr output = storage->write(ASTPtr(), context);
+    BlockOutputStreamPtr output = storage->write(ASTPtr(), storage->getInMemoryMetadataPtr(), context);

    /// Write data
    auto sink = std::make_shared<SinkToOutputStream>(std::move(output));
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -316,7 +316,7 @@ struct Settings : public SettingsCollection<Settings>
    M(SettingBool, log_profile_events, true, "Log query performance statistics into the query_log and query_thread_log.", 0) \
    M(SettingBool, log_query_settings, true, "Log query settings into the query_log.", 0) \
    M(SettingBool, log_query_threads, true, "Log query threads into system.query_thread_log table. This setting have effect only when 'log_queries' is true.", 0) \
-    M(SettingLogsLevel, send_logs_level, LogsLevel::none, "Send server text logs with specified minimum level to client. Valid values: 'trace', 'debug', 'information', 'warning', 'error', 'none'", 0) \
+    M(SettingLogsLevel, send_logs_level, LogsLevel::fatal, "Send server text logs with specified minimum level to client. Valid values: 'trace', 'debug', 'information', 'warning', 'error', 'fatal', 'none'", 0) \
    M(SettingBool, enable_optimize_predicate_expression, 1, "If it is set to true, optimize predicates to subqueries.", 0) \
    M(SettingBool, enable_optimize_predicate_expression_to_final_subquery, 1, "Allow push predicate to final subquery.", 0) \
    \
@ -348,6 +348,7 @@ struct Settings : public SettingsCollection<Settings>
    \
    M(SettingSeconds, distributed_replica_error_half_life, DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_DECREASE_ERROR_PERIOD, "Time period reduces replica error counter by 2 times.", 0) \
    M(SettingUInt64, distributed_replica_error_cap, DBMS_CONNECTION_POOL_WITH_FAILOVER_MAX_ERROR_COUNT, "Max number of errors per replica, prevents piling up an incredible amount of errors if replica was offline for some time and allows it to be reconsidered in a shorter amount of time.", 0) \
+    M(SettingUInt64, distributed_replica_max_ignored_errors, 0, "Number of errors that will be ignored while choosing replicas", 0) \
    \
    M(SettingBool, allow_experimental_live_view, false, "Enable LIVE VIEW. Not mature enough.", 0) \
    M(SettingSeconds, live_view_heartbeat_interval, DEFAULT_LIVE_VIEW_HEARTBEAT_INTERVAL_SEC, "The heartbeat interval in seconds to indicate live query is alive.", 0) \
--- a/src/Core/SettingsCollection.cpp
+++ b/src/Core/SettingsCollection.cpp
@ -542,6 +542,7 @@ IMPLEMENT_SETTING_ENUM(FormatSettings::DateTimeInputFormat, DATE_TIME_INPUT_FORM

 #define LOGS_LEVEL_LIST_OF_NAMES(M) \
    M(none, "none") \
+    M(fatal, "fatal") \
    M(error, "error") \
    M(warning, "warning") \
    M(information, "information") \
--- a/src/Core/SettingsCollection.h
+++ b/src/Core/SettingsCollection.h
@ -302,6 +302,7 @@ using SettingDateTimeInputFormat = SettingEnum<FormatSettings::DateTimeInputForm
 enum class LogsLevel
 {
    none = 0,    /// Disable
+    fatal,
    error,
    warning,
    information,
--- a/src/Core/iostream_debug_helpers.cpp
+++ b/src/Core/iostream_debug_helpers.cpp
@ -49,11 +49,11 @@ std::ostream & operator<<(std::ostream & stream, const IStorage & what)
 {
    auto table_id = what.getStorageID();
    stream << "IStorage(name = " << what.getName() << ", tableName = " << table_id.table_name << ") {"
-           << what.getColumns().getAllPhysical().toString() << "}";
+           << what.getInMemoryMetadataPtr()->getColumns().getAllPhysical().toString() << "}";
    return stream;
 }

-std::ostream & operator<<(std::ostream & stream, const TableStructureReadLock &)
+std::ostream & operator<<(std::ostream & stream, const TableLockHolder &)
 {
    stream << "TableStructureReadLock()";
    return stream;
--- a/src/Core/iostream_debug_helpers.h
+++ b/src/Core/iostream_debug_helpers.h
@ -22,9 +22,6 @@ std::ostream & operator<<(std::ostream & stream, const IDataType & what);
 class IStorage;
 std::ostream & operator<<(std::ostream & stream, const IStorage & what);

-class TableStructureReadLock;
-std::ostream & operator<<(std::ostream & stream, const TableStructureReadLock & what);
-
 class IFunctionOverloadResolver;
 std::ostream & operator<<(std::ostream & stream, const IFunctionOverloadResolver & what);

--- a/src/DataStreams/CreatingSetsBlockInputStream.cpp
+++ b/src/DataStreams/CreatingSetsBlockInputStream.cpp
@ -101,7 +101,7 @@ void CreatingSetsBlockInputStream::createOne(SubqueryForSet & subquery)

    BlockOutputStreamPtr table_out;
    if (subquery.table)
-        table_out = subquery.table->write({}, context);
+        table_out = subquery.table->write({}, subquery.table->getInMemoryMetadataPtr(), context);

    bool done_with_set = !subquery.set;
    bool done_with_join = !subquery.join;
--- a/src/DataStreams/IBlockInputStream.h
+++ b/src/DataStreams/IBlockInputStream.h
@ -6,7 +6,7 @@
 #include <DataStreams/SizeLimits.h>
 #include <DataStreams/ExecutionSpeedLimits.h>
 #include <IO/Progress.h>
-#include <Storages/TableStructureLockHolder.h>
+#include <Storages/TableLockHolder.h>
 #include <Common/TypePromotion.h>

 #include <atomic>
@ -109,7 +109,7 @@ public:
    size_t checkDepth(size_t max_depth) const { return checkDepthImpl(max_depth, max_depth); }

    /// Do not allow to change the table while the blocks stream and its children are alive.
-    void addTableLock(const TableStructureReadLockHolder & lock) { table_locks.push_back(lock); }
+    void addTableLock(const TableLockHolder & lock) { table_locks.push_back(lock); }

    /// Get information about execution speed.
    const BlockStreamProfileInfo & getProfileInfo() const { return info; }
@ -229,7 +229,7 @@ public:
 protected:
    /// Order is important: `table_locks` must be destroyed after `children` so that tables from
    /// which child streams read are protected by the locks during the lifetime of the child streams.
-    std::vector<TableStructureReadLockHolder> table_locks;
+    std::vector<TableLockHolder> table_locks;

    BlockInputStreams children;
    std::shared_mutex children_mutex;
--- a/src/DataStreams/IBlockOutputStream.h
+++ b/src/DataStreams/IBlockOutputStream.h
@ -2,7 +2,7 @@

 #include <Core/Block.h>
 #include <DataStreams/IBlockStream_fwd.h>
-#include <Storages/TableStructureLockHolder.h>
+#include <Storages/TableLockHolder.h>

 #include <boost/noncopyable.hpp>

@ -61,10 +61,10 @@ public:

    /** Don't let to alter table while instance of stream is alive.
      */
-    void addTableLock(const TableStructureReadLockHolder & lock) { table_locks.push_back(lock); }
+    void addTableLock(const TableLockHolder & lock) { table_locks.push_back(lock); }

 private:
-    std::vector<TableStructureReadLockHolder> table_locks;
+    std::vector<TableLockHolder> table_locks;
 };

 }
--- a/src/DataStreams/InputStreamFromASTInsertQuery.cpp
+++ b/src/DataStreams/InputStreamFromASTInsertQuery.cpp
@ -21,7 +21,11 @@ namespace ErrorCodes


 InputStreamFromASTInsertQuery::InputStreamFromASTInsertQuery(
-    const ASTPtr & ast, ReadBuffer * input_buffer_tail_part, const Block & header, const Context & context, const ASTPtr & input_function)
+    const ASTPtr & ast,
+    ReadBuffer * input_buffer_tail_part,
+    const Block & header,
+    const Context & context,
+    const ASTPtr & input_function)
 {
    const auto * ast_insert_query = ast->as<ASTInsertQuery>();

@ -59,7 +63,8 @@ InputStreamFromASTInsertQuery::InputStreamFromASTInsertQuery(
    if (context.getSettingsRef().input_format_defaults_for_omitted_fields && ast_insert_query->table_id && !input_function)
    {
        StoragePtr storage = DatabaseCatalog::instance().getTable(ast_insert_query->table_id, context);
-        auto column_defaults = storage->getColumns().getDefaults();
+        auto metadata_snapshot = storage->getInMemoryMetadataPtr();
+        auto column_defaults = metadata_snapshot->getColumns().getDefaults();
        if (!column_defaults.empty())
            res_stream = std::make_shared<AddingDefaultsBlockInputStream>(res_stream, column_defaults, context);
    }
--- a/src/DataStreams/InputStreamFromASTInsertQuery.h
+++ b/src/DataStreams/InputStreamFromASTInsertQuery.h
@ -11,6 +11,8 @@ namespace DB

 struct BlockIO;
 class Context;
+struct StorageInMemoryMetadata;
+using StorageMetadataPtr = std::shared_ptr<const StorageInMemoryMetadata>;

 /** Prepares an input stream which produce data containing in INSERT query
  * Head of inserting data could be stored in INSERT ast directly
@ -19,7 +21,8 @@ class Context;
 class InputStreamFromASTInsertQuery : public IBlockInputStream
 {
 public:
-    InputStreamFromASTInsertQuery(const ASTPtr & ast,
+    InputStreamFromASTInsertQuery(
+        const ASTPtr & ast,
        ReadBuffer * input_buffer_tail_part,
        const Block & header,
        const Context & context,
--- a/src/DataStreams/PushingToViewsBlockOutputStream.cpp
+++ b/src/DataStreams/PushingToViewsBlockOutputStream.cpp
@ -19,15 +19,21 @@ namespace DB

 PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream(
    const StoragePtr & storage_,
-    const Context & context_, const ASTPtr & query_ptr_, bool no_destination)
-    : storage(storage_), context(context_), query_ptr(query_ptr_)
+    const StorageMetadataPtr & metadata_snapshot_,
+    const Context & context_,
+    const ASTPtr & query_ptr_,
+    bool no_destination)
+    : storage(storage_)
+    , metadata_snapshot(metadata_snapshot_)
+    , context(context_)
+    , query_ptr(query_ptr_)
 {
    /** TODO This is a very important line. At any insertion into the table one of streams should own lock.
      * Although now any insertion into the table is done via PushingToViewsBlockOutputStream,
      *  but it's clear that here is not the best place for this functionality.
      */
    addTableLock(
-            storage->lockStructureForShare(true, context.getInitialQueryId(), context.getSettingsRef().lock_acquire_timeout));
+            storage->lockForShare(context.getInitialQueryId(), context.getSettingsRef().lock_acquire_timeout));

    /// If the "root" table deduplactes blocks, there are no need to make deduplication for children
    /// Moreover, deduplication for AggregatingMergeTree children could produce false positives due to low size of inserting blocks
@ -60,6 +66,7 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream(
    for (const auto & database_table : dependencies)
    {
        auto dependent_table = DatabaseCatalog::instance().getTable(database_table, context);
+        auto dependent_metadata_snapshot = dependent_table->getInMemoryMetadataPtr();

        ASTPtr query;
        BlockOutputStreamPtr out;
@ -67,12 +74,12 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream(
        if (auto * materialized_view = dynamic_cast<StorageMaterializedView *>(dependent_table.get()))
        {
            addTableLock(
-                    materialized_view->lockStructureForShare(
-                            true, context.getInitialQueryId(), context.getSettingsRef().lock_acquire_timeout));
+                    materialized_view->lockForShare(context.getInitialQueryId(), context.getSettingsRef().lock_acquire_timeout));

            StoragePtr inner_table = materialized_view->getTargetTable();
            auto inner_table_id = inner_table->getStorageID();
-            query = materialized_view->getSelectQuery().inner_query;
+            auto inner_metadata_snapshot = inner_table->getInMemoryMetadataPtr();
+            query = dependent_metadata_snapshot->getSelectQuery().inner_query;

            std::unique_ptr<ASTInsertQuery> insert = std::make_unique<ASTInsertQuery>();
            insert->table_id = inner_table_id;
@ -83,7 +90,7 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream(

            /// Insert only columns returned by select.
            auto list = std::make_shared<ASTExpressionList>();
-            const auto & inner_table_columns = inner_table->getColumns();
+            const auto & inner_table_columns = inner_metadata_snapshot->getColumns();
            for (auto & column : header)
                /// But skip columns which storage doesn't have.
                if (inner_table_columns.hasPhysical(column.name))
@ -97,9 +104,11 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream(
            out = io.out;
        }
        else if (dynamic_cast<const StorageLiveView *>(dependent_table.get()))
-            out = std::make_shared<PushingToViewsBlockOutputStream>(dependent_table, *insert_context, ASTPtr(), true);
+            out = std::make_shared<PushingToViewsBlockOutputStream>(
+                dependent_table, dependent_metadata_snapshot, *insert_context, ASTPtr(), true);
        else
-            out = std::make_shared<PushingToViewsBlockOutputStream>(dependent_table, *insert_context, ASTPtr());
+            out = std::make_shared<PushingToViewsBlockOutputStream>(
+                dependent_table, dependent_metadata_snapshot, *insert_context, ASTPtr());

        views.emplace_back(ViewInfo{std::move(query), database_table, std::move(out), nullptr});
    }
@ -107,7 +116,7 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream(
    /// Do not push to destination table if the flag is set
    if (!no_destination)
    {
-        output = storage->write(query_ptr, context);
+        output = storage->write(query_ptr, storage->getInMemoryMetadataPtr(), context);
        replicated_output = dynamic_cast<ReplicatedMergeTreeBlockOutputStream *>(output.get());
    }
 }
@ -118,9 +127,9 @@ Block PushingToViewsBlockOutputStream::getHeader() const
    /// If we don't write directly to the destination
    /// then expect that we're inserting with precalculated virtual columns
    if (output)
-        return storage->getSampleBlock();
+        return metadata_snapshot->getSampleBlock();
    else
-        return storage->getSampleBlockWithVirtuals();
+        return metadata_snapshot->getSampleBlockWithVirtuals(storage->getVirtuals());
 }


@ -314,7 +323,7 @@ void PushingToViewsBlockOutputStream::process(const Block & block, size_t view_n
            Context local_context = *select_context;
            local_context.addViewSource(
                StorageValues::create(
-                    storage->getStorageID(), storage->getColumns(), block, storage->getVirtuals()));
+                    storage->getStorageID(), metadata_snapshot->getColumns(), block, storage->getVirtuals()));
            select.emplace(view.query, local_context, SelectQueryOptions());
            in = std::make_shared<MaterializingBlockInputStream>(select->execute().getInputStream());

--- a/src/DataStreams/PushingToViewsBlockOutputStream.h
+++ b/src/DataStreams/PushingToViewsBlockOutputStream.h
@ -17,8 +17,12 @@ class ReplicatedMergeTreeBlockOutputStream;
 class PushingToViewsBlockOutputStream : public IBlockOutputStream
 {
 public:
-    PushingToViewsBlockOutputStream(const StoragePtr & storage_,
-        const Context & context_, const ASTPtr & query_ptr_, bool no_destination = false);
+    PushingToViewsBlockOutputStream(
+        const StoragePtr & storage_,
+        const StorageMetadataPtr & metadata_snapshot_,
+        const Context & context_,
+        const ASTPtr & query_ptr_,
+        bool no_destination = false);

    Block getHeader() const override;
    void write(const Block & block) override;
@ -29,6 +33,7 @@ public:

 private:
    StoragePtr storage;
+    StorageMetadataPtr metadata_snapshot;
    BlockOutputStreamPtr output;
    ReplicatedMergeTreeBlockOutputStream * replicated_output = nullptr;

--- a/src/DataStreams/RemoteQueryExecutor.cpp
+++ b/src/DataStreams/RemoteQueryExecutor.cpp
@ -319,18 +319,22 @@ void RemoteQueryExecutor::sendExternalTables()
            for (const auto & table : external_tables)
            {
                StoragePtr cur = table.second;
+                auto metadata_snapshot = cur->getInMemoryMetadataPtr();
                QueryProcessingStage::Enum read_from_table_stage = cur->getQueryProcessingStage(context);

                Pipes pipes;

-                pipes = cur->read(cur->getColumns().getNamesOfPhysical(), {}, context,
-                                  read_from_table_stage, DEFAULT_BLOCK_SIZE, 1);
+                pipes = cur->read(
+                    metadata_snapshot->getColumns().getNamesOfPhysical(),
+                    metadata_snapshot, {}, context,
+                    read_from_table_stage, DEFAULT_BLOCK_SIZE, 1);

                auto data = std::make_unique<ExternalTableData>();
                data->table_name = table.first;

                if (pipes.empty())
-                    data->pipe = std::make_unique<Pipe>(std::make_shared<SourceFromSingleChunk>(cur->getSampleBlock(), Chunk()));
+                    data->pipe = std::make_unique<Pipe>(
+                            std::make_shared<SourceFromSingleChunk>(metadata_snapshot->getSampleBlock(), Chunk()));
                else if (pipes.size() == 1)
                    data->pipe = std::make_unique<Pipe>(std::move(pipes.front()));
                else
--- a/src/DataStreams/TTLBlockInputStream.cpp
+++ b/src/DataStreams/TTLBlockInputStream.cpp
@ -20,10 +20,12 @@ namespace ErrorCodes
 TTLBlockInputStream::TTLBlockInputStream(
    const BlockInputStreamPtr & input_,
    const MergeTreeData & storage_,
+    const StorageMetadataPtr & metadata_snapshot_,
    const MergeTreeData::MutableDataPartPtr & data_part_,
    time_t current_time_,
    bool force_)
    : storage(storage_)
+    , metadata_snapshot(metadata_snapshot_)
    , data_part(data_part_)
    , current_time(current_time_)
    , force(force_)
@ -34,11 +36,11 @@ TTLBlockInputStream::TTLBlockInputStream(
    children.push_back(input_);
    header = children.at(0)->getHeader();

-    const auto & storage_columns = storage.getColumns();
+    const auto & storage_columns = metadata_snapshot->getColumns();
    const auto & column_defaults = storage_columns.getDefaults();

    ASTPtr default_expr_list = std::make_shared<ASTExpressionList>();
-    for (const auto & [name, _] : storage.getColumnTTLs())
+    for (const auto & [name, _] : metadata_snapshot->getColumnTTLs())
    {
        auto it = column_defaults.find(name);
        if (it != column_defaults.end())
@ -65,13 +67,12 @@ TTLBlockInputStream::TTLBlockInputStream(

    if (!default_expr_list->children.empty())
    {
-        auto syntax_result = SyntaxAnalyzer(storage.global_context).analyze(
-            default_expr_list, storage.getColumns().getAllPhysical());
+        auto syntax_result = SyntaxAnalyzer(storage.global_context).analyze(default_expr_list, metadata_snapshot->getColumns().getAllPhysical());
        defaults_expression = ExpressionAnalyzer{default_expr_list, syntax_result, storage.global_context}.getActions(true);
    }

-    auto storage_rows_ttl = storage.getRowsTTL();
-    if (storage.hasRowsTTL() && storage_rows_ttl.mode == TTLMode::GROUP_BY)
+    auto storage_rows_ttl = metadata_snapshot->getRowsTTL();
+    if (metadata_snapshot->hasRowsTTL() && storage_rows_ttl.mode == TTLMode::GROUP_BY)
    {
        current_key_value.resize(storage_rows_ttl.group_by_keys.size());

@ -106,14 +107,15 @@ bool TTLBlockInputStream::isTTLExpired(time_t ttl) const
 Block TTLBlockInputStream::readImpl()
 {
    /// Skip all data if table ttl is expired for part
-    auto storage_rows_ttl = storage.getRowsTTL();
-    if (storage.hasRowsTTL() && !storage_rows_ttl.where_expression &&
-        storage_rows_ttl.mode != TTLMode::GROUP_BY && isTTLExpired(old_ttl_infos.table_ttl.max))
+    auto storage_rows_ttl = metadata_snapshot->getRowsTTL();
+    if (metadata_snapshot->hasRowsTTL() && !storage_rows_ttl.where_expression && storage_rows_ttl.mode != TTLMode::GROUP_BY
+        && isTTLExpired(old_ttl_infos.table_ttl.max))
    {
        rows_removed = data_part->rows_count;
        return {};
    }

+
    Block block = children.at(0)->read();
    if (!block)
    {
@ -127,7 +129,7 @@ Block TTLBlockInputStream::readImpl()
        return block;
    }

-    if (storage.hasRowsTTL() && (force || isTTLExpired(old_ttl_infos.table_ttl.min)))
+    if (metadata_snapshot->hasRowsTTL() && (force || isTTLExpired(old_ttl_infos.table_ttl.min)))
        removeRowsWithExpiredTableTTL(block);

    removeValuesWithExpiredColumnTTL(block);
@ -153,7 +155,7 @@ void TTLBlockInputStream::readSuffixImpl()

 void TTLBlockInputStream::removeRowsWithExpiredTableTTL(Block & block)
 {
-    auto rows_ttl = storage.getRowsTTL();
+    auto rows_ttl = metadata_snapshot->getRowsTTL();

    rows_ttl.expression->execute(block);
    if (rows_ttl.where_expression)
@ -201,7 +203,7 @@ void TTLBlockInputStream::removeRowsWithExpiredTableTTL(Block & block)
        size_t rows_aggregated = 0;
        size_t current_key_start = 0;
        size_t rows_with_current_key = 0;
-        auto storage_rows_ttl = storage.getRowsTTL();
+        auto storage_rows_ttl = metadata_snapshot->getRowsTTL();
        for (size_t i = 0; i < block.rows(); ++i)
        {
            UInt32 cur_ttl = getTimestampByIndex(ttl_column, i);
@ -278,7 +280,7 @@ void TTLBlockInputStream::finalizeAggregates(MutableColumns & result_columns)
    if (!agg_result.empty())
    {
        auto aggregated_res = aggregator->convertToBlocks(agg_result, true, 1);
-        auto storage_rows_ttl = storage.getRowsTTL();
+        auto storage_rows_ttl = metadata_snapshot->getRowsTTL();
        for (auto & agg_block : aggregated_res)
        {
            for (const auto & it : storage_rows_ttl.set_parts)
@ -310,7 +312,7 @@ void TTLBlockInputStream::removeValuesWithExpiredColumnTTL(Block & block)
    }

    std::vector<String> columns_to_remove;
-    for (const auto & [name, ttl_entry] : storage.getColumnTTLs())
+    for (const auto & [name, ttl_entry] : metadata_snapshot->getColumnTTLs())
    {
        /// If we read not all table columns. E.g. while mutation.
        if (!block.has(name))
@ -371,7 +373,7 @@ void TTLBlockInputStream::removeValuesWithExpiredColumnTTL(Block & block)
 void TTLBlockInputStream::updateMovesTTL(Block & block)
 {
    std::vector<String> columns_to_remove;
-    for (const auto & ttl_entry : storage.getMoveTTLs())
+    for (const auto & ttl_entry : metadata_snapshot->getMoveTTLs())
    {
        auto & new_ttl_info = new_ttl_infos.moves_ttl[ttl_entry.result_column];

--- a/src/DataStreams/TTLBlockInputStream.h
+++ b/src/DataStreams/TTLBlockInputStream.h
@ -16,6 +16,7 @@ public:
    TTLBlockInputStream(
        const BlockInputStreamPtr & input_,
        const MergeTreeData & storage_,
+        const StorageMetadataPtr & metadata_snapshot_,
        const MergeTreeData::MutableDataPartPtr & data_part_,
        time_t current_time,
        bool force_
@ -33,6 +34,7 @@ protected:

 private:
    const MergeTreeData & storage;
+    StorageMetadataPtr metadata_snapshot;

    /// ttl_infos and empty_columns are updating while reading
    const MergeTreeData::MutableDataPartPtr & data_part;
--- a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp
+++ b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp
@ -30,7 +30,9 @@ namespace ErrorCodes
    extern const int LOGICAL_ERROR;
 }

-static const std::vector<String> supported_functions{"any", "anyLast", "min", "max", "sum", "groupBitAnd", "groupBitOr", "groupBitXor", "sumMap", "groupArrayArray", "groupUniqArrayArray"};
+static const std::vector<String> supported_functions{"any", "anyLast", "min",
+    "max", "sum", "sumWithOverflow", "groupBitAnd", "groupBitOr", "groupBitXor",
+    "sumMap", "groupArrayArray", "groupUniqArrayArray"};


 String DataTypeCustomSimpleAggregateFunction::getName() const
--- a/src/Databases/DatabaseMySQL.cpp
+++ b/src/Databases/DatabaseMySQL.cpp
@ -139,7 +139,8 @@ static ASTPtr getCreateQueryFromStorage(const StoragePtr & storage, const ASTPtr
        create_table_query->table = table_id.table_name;
        create_table_query->database = table_id.database_name;

-        for (const auto & column_type_and_name : storage->getColumns().getOrdinary())
+        auto metadata_snapshot = storage->getInMemoryMetadataPtr();
+        for (const auto & column_type_and_name : metadata_snapshot->getColumns().getOrdinary())
        {
            const auto & column_declaration = std::make_shared<ASTColumnDeclaration>();
            column_declaration->name = column_type_and_name.name;
@ -361,7 +362,7 @@ void DatabaseMySQL::cleanOutdatedTables()
                ++iterator;
            else
            {
-                const auto table_lock = (*iterator)->lockAlterIntention(RWLockImpl::NO_QUERY, lock_acquire_timeout);
+                const auto table_lock = (*iterator)->lockExclusively(RWLockImpl::NO_QUERY, lock_acquire_timeout);

                (*iterator)->shutdown();
                (*iterator)->is_dropped = true;
--- a/src/Databases/DatabaseOnDisk.cpp
+++ b/src/Databases/DatabaseOnDisk.cpp
@ -266,7 +266,7 @@ void DatabaseOnDisk::renameTable(
    }

    auto table_data_relative_path = getTableDataPath(table_name);
-    TableStructureWriteLockHolder table_lock;
+    TableExclusiveLockHolder table_lock;
    String table_metadata_path;
    ASTPtr attach_query;
    /// DatabaseLazy::detachTable may return nullptr even if table exists, so we need tryGetTable for this case.
--- a/src/Databases/IDatabase.h
+++ b/src/Databases/IDatabase.h
@ -22,7 +22,6 @@ class Context;
 struct Settings;
 struct ConstraintsDescription;
 struct IndicesDescription;
-struct TableStructureWriteLockHolder;
 class ASTCreateQuery;
 using Dictionaries = std::vector<String>;

@ -237,7 +236,7 @@ public:
    using ASTModifier = std::function<void(IAST &)>;

    /// Change the table structure in metadata.
-    /// You must call under the TableStructureLock of the corresponding table . If engine_modifier is empty, then engine does not change.
+    /// You must call under the alter_lock of the corresponding table . If engine_modifier is empty, then engine does not change.
    virtual void alterTable(
        const Context & /*context*/,
        const StorageID & /*table_id*/,
--- a/src/Functions/FunctionJoinGet.cpp
+++ b/src/Functions/FunctionJoinGet.cpp
@ -67,8 +67,7 @@ FunctionBaseImplPtr JoinGetOverloadResolver<or_null>::build(const ColumnsWithTyp
    auto join = storage_join->getJoin();
    DataTypes data_types(arguments.size());

-    auto table_lock = storage_join->lockStructureForShare(
-            false, context.getInitialQueryId(), context.getSettingsRef().lock_acquire_timeout);
+    auto table_lock = storage_join->lockForShare(context.getInitialQueryId(), context.getSettingsRef().lock_acquire_timeout);
    for (size_t i = 0; i < arguments.size(); ++i)
        data_types[i] = arguments[i].type;

--- a/src/Functions/FunctionJoinGet.h
+++ b/src/Functions/FunctionJoinGet.h
@ -1,6 +1,6 @@
 #include <Functions/IFunctionImpl.h>
 #include <Storages/IStorage_fwd.h>
-#include <Storages/TableStructureLockHolder.h>
+#include <Storages/TableLockHolder.h>

 namespace DB
 {
@ -37,7 +37,7 @@ class FunctionJoinGet final : public IFunctionBaseImpl
 public:
    static constexpr auto name = or_null ? "joinGetOrNull" : "joinGet";

-    FunctionJoinGet(TableStructureReadLockHolder table_lock_, StoragePtr storage_join_,
+    FunctionJoinGet(TableLockHolder table_lock_, StoragePtr storage_join_,
                    HashJoinPtr join_, String attr_name_,
                    DataTypes argument_types_, DataTypePtr return_type_)
        : table_lock(std::move(table_lock_))
@ -57,7 +57,7 @@ public:
    ExecutableFunctionImplPtr prepare(const Block & sample_block, const ColumnNumbers & arguments, size_t result) const override;

 private:
-    TableStructureReadLockHolder table_lock;
+    TableLockHolder table_lock;
    StoragePtr storage_join;
    HashJoinPtr join;
    const String attr_name;
--- a/src/Functions/FunctionsHashing.h
+++ b/src/Functions/FunctionsHashing.h
@ -533,7 +533,7 @@ public:

    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
    {
-        if (!isString(arguments[0]))
+        if (!isStringOrFixedString(arguments[0]))
            throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(),
                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);

@ -567,6 +567,22 @@ public:

            block.getByPosition(result).column = std::move(col_to);
        }
+        else if (
+            const ColumnFixedString * col_from_fix = checkAndGetColumn<ColumnFixedString>(block.getByPosition(arguments[0]).column.get()))
+        {
+            auto col_to = ColumnFixedString::create(Impl::length);
+            const typename ColumnFixedString::Chars & data = col_from_fix->getChars();
+            const auto size = col_from_fix->size();
+            auto & chars_to = col_to->getChars();
+            const auto length = col_from_fix->getN();
+            chars_to.resize(size * Impl::length);
+            for (size_t i = 0; i < size; ++i)
+            {
+                Impl::apply(
+                    reinterpret_cast<const char *>(&data[i * length]), length, reinterpret_cast<uint8_t *>(&chars_to[i * Impl::length]));
+            }
+            block.getByPosition(result).column = std::move(col_to);
+        }
        else
            throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName()
                    + " of first argument of function " + getName(),
--- a/src/Functions/GatherUtils/Algorithms.h
+++ b/src/Functions/GatherUtils/Algorithms.h
@ -6,6 +6,7 @@
 #include "Sinks.h"
 #include <Core/AccurateComparison.h>
 #include <ext/range.h>
+#include "GatherUtils.h"


 namespace DB::ErrorCodes
@ -394,11 +395,12 @@ void NO_INLINE conditional(SourceA && src_a, SourceB && src_b, Sink && sink, con


 /// Methods to check if first array has elements from second array, overloaded for various combinations of types.
-
-template <bool all, typename FirstSliceType, typename SecondSliceType,
+template <
+    ArraySearchType search_type,
+    typename FirstSliceType,
+    typename SecondSliceType,
          bool (*isEqual)(const FirstSliceType &, const SecondSliceType &, size_t, size_t)>
-bool sliceHasImpl(const FirstSliceType & first, const SecondSliceType & second,
-                  const UInt8 * first_null_map, const UInt8 * second_null_map)
+bool sliceHasImplAnyAll(const FirstSliceType & first, const SecondSliceType & second, const UInt8 * first_null_map, const UInt8 * second_null_map)
 {
    const bool has_first_null_map = first_null_map != nullptr;
    const bool has_second_null_map = second_null_map != nullptr;
@ -418,17 +420,113 @@ bool sliceHasImpl(const FirstSliceType & first, const SecondSliceType & second,
                has = true;
        }

-        if (has && !all)
+        if (has && search_type == ArraySearchType::Any)
            return true;

-        if (!has && all)
+        if (!has && search_type == ArraySearchType::All)
            return false;
+    }
+    return search_type == ArraySearchType::All;
+}

+
+/// For details of Knuth-Morris-Pratt string matching algorithm see
+/// https://en.wikipedia.org/wiki/Knuth%E2%80%93Morris%E2%80%93Pratt_algorithm.
+/// A "prefix-function" is defined as: i-th element is the length of the longest of all prefixes that end in i-th position
+template <typename SliceType, typename EqualityFunc>
+std::vector<size_t> buildKMPPrefixFunction(const SliceType & pattern, const EqualityFunc & isEqualFunc)
+{
+    std::vector<size_t> result(pattern.size);
+    result[0] = 0;
+
+    for (size_t i = 1; i < pattern.size; ++i)
+    {
+        result[i] = 0;
+        for (auto length = i; length > 0;)
+        {
+            length = result[length - 1];
+            if (isEqualFunc(pattern, i, length))
+            {
+                result[i] = length + 1;
+                break;
+            }
+        }
    }

-    return all;
+    return result;
 }

+
+template < typename FirstSliceType,
+           typename SecondSliceType,
+           bool (*isEqual)(const FirstSliceType &, const SecondSliceType &, size_t, size_t),
+           bool (*isEqualUnary)(const SecondSliceType &, size_t, size_t)>
+bool sliceHasImplSubstr(const FirstSliceType & first, const SecondSliceType & second, const UInt8 * first_null_map, const UInt8 * second_null_map)
+{
+    if (second.size == 0)
+        return true;
+
+    const bool has_first_null_map = first_null_map != nullptr;
+    const bool has_second_null_map = second_null_map != nullptr;
+
+    std::vector<size_t> prefix_function;
+    if (has_second_null_map)
+    {
+        prefix_function = buildKMPPrefixFunction(second,
+                [null_map = second_null_map](const SecondSliceType & pattern, size_t i, size_t j)
+                {
+                    return !!null_map[i] == !!null_map[j] && (!!null_map[i] || isEqualUnary(pattern, i, j));
+                });
+    }
+    else
+    {
+        prefix_function = buildKMPPrefixFunction(second,
+                [](const SecondSliceType & pattern, size_t i, size_t j) { return isEqualUnary(pattern, i, j); });
+    }
+
+    size_t firstCur = 0;
+    size_t secondCur = 0;
+    while (firstCur < first.size && secondCur < second.size)
+    {
+        const bool is_first_null = has_first_null_map && first_null_map[firstCur];
+        const bool is_second_null = has_second_null_map && second_null_map[secondCur];
+
+        const bool cond_both_null_match = is_first_null && is_second_null;
+        const bool cond_both_not_null = !is_first_null && !is_second_null;
+        if (cond_both_null_match || (cond_both_not_null && isEqual(first, second, firstCur, secondCur)))
+        {
+            ++firstCur;
+            ++secondCur;
+        }
+        else if (secondCur > 0)
+        {
+            secondCur = prefix_function[secondCur - 1];
+        }
+        else
+        {
+            ++firstCur;
+        }
+    }
+
+    return secondCur == second.size;
+}
+
+
+template <
+    ArraySearchType search_type,
+    typename FirstSliceType,
+    typename SecondSliceType,
+    bool (*isEqual)(const FirstSliceType &, const SecondSliceType &, size_t, size_t),
+    bool (*isEqualSecond)(const SecondSliceType &, size_t, size_t)>
+bool sliceHasImpl(const FirstSliceType & first, const SecondSliceType & second, const UInt8 * first_null_map, const UInt8 * second_null_map)
+{
+    if constexpr (search_type == ArraySearchType::Substr)
+        return sliceHasImplSubstr<FirstSliceType, SecondSliceType, isEqual, isEqualSecond>(first, second, first_null_map, second_null_map);
+    else
+        return sliceHasImplAnyAll<search_type, FirstSliceType, SecondSliceType, isEqual>(first, second, first_null_map, second_null_map);
+}
+
+
 template <typename T, typename U>
 bool sliceEqualElements(const NumericArraySlice<T> & first [[maybe_unused]],
                        const NumericArraySlice<U> & second [[maybe_unused]],
@ -461,65 +559,95 @@ inline ALWAYS_INLINE bool sliceEqualElements(const GenericArraySlice & first, co
    return first.elements->compareAt(first_ind + first.begin, second_ind + second.begin, *second.elements, -1) == 0;
 }

-template <bool all, typename T, typename U>
+template <typename T>
+bool insliceEqualElements(const NumericArraySlice<T> & first [[maybe_unused]],
+                          size_t first_ind [[maybe_unused]],
+                          size_t second_ind [[maybe_unused]])
+{
+    if constexpr (IsDecimalNumber<T>)
+        return accurate::equalsOp(typename T::NativeType(first.data[first_ind]), typename T::NativeType(first.data[second_ind]));
+    else
+        return accurate::equalsOp(first.data[first_ind], first.data[second_ind]);
+}
+inline ALWAYS_INLINE bool insliceEqualElements(const GenericArraySlice & first, size_t first_ind, size_t second_ind)
+{
+    return first.elements->compareAt(first_ind + first.begin, second_ind + first.begin, *first.elements, -1) == 0;
+}
+
+template <ArraySearchType search_type, typename T, typename U>
 bool sliceHas(const NumericArraySlice<T> & first, const NumericArraySlice<U> & second)
 {
-    auto impl = sliceHasImpl<all, NumericArraySlice<T>, NumericArraySlice<U>, sliceEqualElements<T, U>>;
+    auto impl = sliceHasImpl<search_type, NumericArraySlice<T>, NumericArraySlice<U>, sliceEqualElements<T, U>, insliceEqualElements<U>>;
    return impl(first, second, nullptr, nullptr);
 }

-template <bool all>
+template <ArraySearchType search_type>
 bool sliceHas(const GenericArraySlice & first, const GenericArraySlice & second)
 {
    /// Generic arrays should have the same type in order to use column.compareAt(...)
    if (!first.elements->structureEquals(*second.elements))
        return false;

-    auto impl = sliceHasImpl<all, GenericArraySlice, GenericArraySlice, sliceEqualElements>;
+    auto impl = sliceHasImpl<search_type, GenericArraySlice, GenericArraySlice, sliceEqualElements, insliceEqualElements>;
    return impl(first, second, nullptr, nullptr);
 }

-template <bool all, typename U>
+template <ArraySearchType search_type, typename U>
 bool sliceHas(const GenericArraySlice & /*first*/, const NumericArraySlice<U> & /*second*/)
 {
    return false;
 }

-template <bool all, typename T>
+template <ArraySearchType search_type, typename T>
 bool sliceHas(const NumericArraySlice<T> & /*first*/, const GenericArraySlice & /*second*/)
 {
    return false;
 }

-template <bool all, typename FirstArraySlice, typename SecondArraySlice>
+template <ArraySearchType search_type, typename FirstArraySlice, typename SecondArraySlice>
 bool sliceHas(const FirstArraySlice & first, NullableSlice<SecondArraySlice> & second)
 {
-    auto impl = sliceHasImpl<all, FirstArraySlice, SecondArraySlice, sliceEqualElements<FirstArraySlice, SecondArraySlice>>;
+    auto impl = sliceHasImpl<
+        search_type,
+        FirstArraySlice,
+        SecondArraySlice,
+        sliceEqualElements<FirstArraySlice, SecondArraySlice>,
+        insliceEqualElements<SecondArraySlice>>;
    return impl(first, second, nullptr, second.null_map);
 }

-template <bool all, typename FirstArraySlice, typename SecondArraySlice>
+template <ArraySearchType search_type, typename FirstArraySlice, typename SecondArraySlice>
 bool sliceHas(const NullableSlice<FirstArraySlice> & first, SecondArraySlice & second)
 {
-    auto impl = sliceHasImpl<all, FirstArraySlice, SecondArraySlice, sliceEqualElements<FirstArraySlice, SecondArraySlice>>;
+    auto impl = sliceHasImpl<
+        search_type,
+        FirstArraySlice,
+        SecondArraySlice,
+        sliceEqualElements<FirstArraySlice, SecondArraySlice>,
+        insliceEqualElements<SecondArraySlice>>;
    return impl(first, second, first.null_map, nullptr);
 }

-template <bool all, typename FirstArraySlice, typename SecondArraySlice>
+template <ArraySearchType search_type, typename FirstArraySlice, typename SecondArraySlice>
 bool sliceHas(const NullableSlice<FirstArraySlice> & first, NullableSlice<SecondArraySlice> & second)
 {
-    auto impl = sliceHasImpl<all, FirstArraySlice, SecondArraySlice, sliceEqualElements<FirstArraySlice, SecondArraySlice>>;
+    auto impl = sliceHasImpl<
+        search_type,
+        FirstArraySlice,
+        SecondArraySlice,
+        sliceEqualElements<FirstArraySlice, SecondArraySlice>,
+        insliceEqualElements<SecondArraySlice>>;
    return impl(first, second, first.null_map, second.null_map);
 }

-template <bool all, typename FirstSource, typename SecondSource>
+template <ArraySearchType search_type, typename FirstSource, typename SecondSource>
 void NO_INLINE arrayAllAny(FirstSource && first, SecondSource && second, ColumnUInt8 & result)
 {
    auto size = result.size();
    auto & data = result.getData();
    for (auto row : ext::range(0, size))
    {
-        data[row] = static_cast<UInt8>(sliceHas<all>(first.getWhole(), second.getWhole()) ? 1 : 0);
+        data[row] = static_cast<UInt8>(sliceHas<search_type>(first.getWhole(), second.getWhole()) ? 1 : 0);
        first.next();
        second.next();
    }
--- a/src/Functions/GatherUtils/GatherUtils.h
+++ b/src/Functions/GatherUtils/GatherUtils.h
@ -30,6 +30,13 @@
 namespace DB::GatherUtils
 {

+enum class ArraySearchType
+{
+  Any, // Corresponds to the hasAny array function
+  All, // Corresponds to the hasAll array function
+  Substr // Corresponds to the hasSubstr array function
+};
+
 std::unique_ptr<IArraySource> createArraySource(const ColumnArray & col, bool is_const, size_t total_rows);
 std::unique_ptr<IValueSource> createValueSource(const IColumn & col, bool is_const, size_t total_rows);
 std::unique_ptr<IArraySink> createArraySink(ColumnArray & col, size_t column_size);
@ -45,7 +52,7 @@ void sliceFromRightConstantOffsetBounded(IArraySource & src, IArraySink & sink,
 void sliceDynamicOffsetUnbounded(IArraySource & src, IArraySink & sink, const IColumn & offset_column);
 void sliceDynamicOffsetBounded(IArraySource & src, IArraySink & sink, const IColumn & offset_column, const IColumn & length_column);

-void sliceHas(IArraySource & first, IArraySource & second, bool all, ColumnUInt8 & result);
+void sliceHas(IArraySource & first, IArraySource & second, ArraySearchType & search_type, ColumnUInt8 & result);

 void push(IArraySource & array_source, IValueSource & value_source, IArraySink & sink, bool push_front);

--- a/src/Functions/GatherUtils/has.cpp
+++ b/src/Functions/GatherUtils/has.cpp
@ -8,18 +8,28 @@ namespace DB::GatherUtils
 struct ArrayHasSelectArraySourcePair : public ArraySourcePairSelector<ArrayHasSelectArraySourcePair>
 {
    template <typename FirstSource, typename SecondSource>
-    static void selectSourcePair(FirstSource && first, SecondSource && second, bool all, ColumnUInt8 & result)
+    static void selectSourcePair(FirstSource && first, SecondSource && second, ArraySearchType & search_type, ColumnUInt8 & result)
    {
-        if (all)
-            arrayAllAny<true>(first, second, result);
-        else
-            arrayAllAny<false>(first, second, result);
+        switch (search_type)
+        {
+            case ArraySearchType::All:
+                arrayAllAny<ArraySearchType::All>(first, second, result);
+                break;
+            case ArraySearchType::Any:
+                arrayAllAny<ArraySearchType::Any>(first, second, result);
+                break;
+            case ArraySearchType::Substr:
+                arrayAllAny<ArraySearchType::Substr>(first, second, result);
+                break;
+
+        }
    }
 };

-void sliceHas(IArraySource & first, IArraySource & second, bool all, ColumnUInt8 & result)
+
+void sliceHas(IArraySource & first, IArraySource & second, ArraySearchType & search_type, ColumnUInt8 & result)
 {
-    ArrayHasSelectArraySourcePair::select(first, second, all, result);
+    ArrayHasSelectArraySourcePair::select(first, second, search_type, result);
 }

 }
--- a/src/Functions/array/hasAll.cpp
+++ b/src/Functions/array/hasAll.cpp
@ -1,5 +1,6 @@
 #include "hasAllAny.h"
 #include <Functions/FunctionFactory.h>
+#include <Functions/GatherUtils/GatherUtils.h>


 namespace DB
@ -10,7 +11,7 @@ class FunctionArrayHasAll : public FunctionArrayHasAllAny
 public:
    static constexpr auto name = "hasAll";
    static FunctionPtr create(const Context &) { return std::make_shared<FunctionArrayHasAll>(); }
-    FunctionArrayHasAll() : FunctionArrayHasAllAny(true, name) {}
+    FunctionArrayHasAll() : FunctionArrayHasAllAny(GatherUtils::ArraySearchType::All, name) {}
 };

 void registerFunctionHasAll(FunctionFactory & factory)
--- a/src/Functions/array/hasAllAny.h
+++ b/src/Functions/array/hasAllAny.h
@ -27,8 +27,8 @@ namespace ErrorCodes
 class FunctionArrayHasAllAny : public IFunction
 {
 public:
-    FunctionArrayHasAllAny(bool all_, const char * name_)
-        : all(all_), name(name_) {}
+    FunctionArrayHasAllAny(GatherUtils::ArraySearchType search_type_, const char * name_)
+        : search_type(search_type_), name(name_) {}

    String getName() const override { return name; }

@ -106,7 +106,7 @@ public:

        auto result_column = ColumnUInt8::create(rows);
        auto result_column_ptr = typeid_cast<ColumnUInt8 *>(result_column.get());
-        GatherUtils::sliceHas(*sources[0], *sources[1], all, *result_column_ptr);
+        GatherUtils::sliceHas(*sources[0], *sources[1], search_type, *result_column_ptr);

        block.getByPosition(result).column = std::move(result_column);
    }
@ -114,7 +114,7 @@ public:
    bool useDefaultImplementationForConstants() const override { return true; }

 private:
-    bool all;
+    GatherUtils::ArraySearchType search_type;
    const char * name;
 };

--- a/src/Functions/array/hasAny.cpp
+++ b/src/Functions/array/hasAny.cpp
@ -1,5 +1,6 @@
 #include "hasAllAny.h"
 #include <Functions/FunctionFactory.h>
+#include <Functions/GatherUtils/GatherUtils.h>


 namespace DB
@ -10,7 +11,7 @@ class FunctionArrayHasAny : public FunctionArrayHasAllAny
 public:
    static constexpr auto name = "hasAny";
    static FunctionPtr create(const Context &) { return std::make_shared<FunctionArrayHasAny>(); }
-    FunctionArrayHasAny() : FunctionArrayHasAllAny(false, name) {}
+    FunctionArrayHasAny() : FunctionArrayHasAllAny(GatherUtils::ArraySearchType::Any, name) {}
 };

 void registerFunctionHasAny(FunctionFactory & factory)
--- a/src/Functions/array/hasSubstr.cpp
+++ b/src/Functions/array/hasSubstr.cpp
@ -0,0 +1,22 @@
+#include "hasAllAny.h"
+#include <Functions/FunctionFactory.h>
+#include <Functions/GatherUtils/GatherUtils.h>
+
+
+namespace DB
+{
+
+class FunctionArrayHasSubstr : public FunctionArrayHasAllAny
+{
+public:
+    static constexpr auto name = "hasSubstr";
+    static FunctionPtr create(const Context &) { return std::make_shared<FunctionArrayHasSubstr>(); }
+    FunctionArrayHasSubstr() : FunctionArrayHasAllAny(GatherUtils::ArraySearchType::Substr, name) {}
+};
+
+void registerFunctionHasSubstr(FunctionFactory & factory)
+{
+    factory.registerFunction<FunctionArrayHasSubstr>();
+}
+
+}
--- a/src/Functions/array/registerFunctionsArray.cpp
+++ b/src/Functions/array/registerFunctionsArray.cpp
@ -8,6 +8,7 @@ void registerFunctionArrayResize(FunctionFactory &);
 void registerFunctionHas(FunctionFactory &);
 void registerFunctionHasAll(FunctionFactory &);
 void registerFunctionHasAny(FunctionFactory &);
+void registerFunctionHasSubstr(FunctionFactory &);
 void registerFunctionIndexOf(FunctionFactory &);
 void registerFunctionCountEqual(FunctionFactory &);
 void registerFunctionArrayIntersect(FunctionFactory &);
@ -43,6 +44,7 @@ void registerFunctionsArray(FunctionFactory & factory)
    registerFunctionHas(factory);
    registerFunctionHasAll(factory);
    registerFunctionHasAny(factory);
+    registerFunctionHasSubstr(factory);
    registerFunctionIndexOf(factory);
    registerFunctionCountEqual(factory);
    registerFunctionArrayIntersect(factory);
--- a/src/Functions/hasColumnInTable.cpp
+++ b/src/Functions/hasColumnInTable.cpp
@ -114,7 +114,8 @@ void FunctionHasColumnInTable::executeImpl(Block & block, const ColumnNumbers &
    if (host_name.empty())
    {
        const StoragePtr & table = DatabaseCatalog::instance().getTable({database_name, table_name}, global_context);
-        has_column = table->getColumns().hasPhysical(column_name);
+        auto table_metadata = table->getInMemoryMetadataPtr();
+        has_column = table_metadata->getColumns().hasPhysical(column_name);
    }
    else
    {
--- a/src/Functions/ya.make
+++ b/src/Functions/ya.make
@ -92,6 +92,7 @@ SRCS(
    array/emptyArrayToSingle.cpp
    array/hasAll.cpp
    array/hasAny.cpp
+    array/hasSubstr.cpp
    array/has.cpp
    array/indexOf.cpp
    array/length.cpp
--- a/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/src/Interpreters/ExpressionAnalyzer.cpp
@ -363,7 +363,7 @@ void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node)
        const IAST & args = *func->arguments;
        const ASTPtr & left_in_operand = args.children.at(0);

-        if (storage()->mayBenefitFromIndexForIn(left_in_operand, context))
+        if (storage()->mayBenefitFromIndexForIn(left_in_operand, context, metadata_snapshot))
        {
            const ASTPtr & arg = args.children.at(1);
            if (arg->as<ASTSubquery>() || arg->as<ASTIdentifier>())
@ -1018,6 +1018,7 @@ ExpressionActionsPtr SelectQueryExpressionAnalyzer::simpleSelectActions()

 ExpressionAnalysisResult::ExpressionAnalysisResult(
        SelectQueryExpressionAnalyzer & query_analyzer,
+        const StorageMetadataPtr & metadata_snapshot,
        bool first_stage_,
        bool second_stage_,
        bool only_types,
@ -1068,14 +1069,14 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(

        if (storage && (query.sampleSize() || settings.parallel_replicas_count > 1))
        {
-            Names columns_for_sampling = storage->getColumnsRequiredForSampling();
+            Names columns_for_sampling = metadata_snapshot->getColumnsRequiredForSampling();
            additional_required_columns_after_prewhere.insert(additional_required_columns_after_prewhere.end(),
                columns_for_sampling.begin(), columns_for_sampling.end());
        }

        if (storage && query.final())
        {
-            Names columns_for_final = storage->getColumnsRequiredForFinal();
+            Names columns_for_final = metadata_snapshot->getColumnsRequiredForFinal();
            additional_required_columns_after_prewhere.insert(additional_required_columns_after_prewhere.end(),
                columns_for_final.begin(), columns_for_final.end());
        }
--- a/src/Interpreters/ExpressionAnalyzer.h
+++ b/src/Interpreters/ExpressionAnalyzer.h
@ -11,7 +11,6 @@
 #include <Storages/SelectQueryInfo.h>
 #include <Interpreters/DatabaseCatalog.h>

-
 namespace DB
 {

@ -32,6 +31,9 @@ class ASTExpressionList;
 class ASTSelectQuery;
 struct ASTTablesInSelectQueryElement;

+struct StorageInMemoryMetadata;
+using StorageMetadataPtr = std::shared_ptr<const StorageInMemoryMetadata>;
+
 /// Create columns in block or return false if not possible
 bool sanitizeBlock(Block & block, bool throw_if_cannot_create_column = false);

@ -202,6 +204,7 @@ struct ExpressionAnalysisResult

    ExpressionAnalysisResult(
        SelectQueryExpressionAnalyzer & query_analyzer,
+        const StorageMetadataPtr & metadata_snapshot,
        bool first_stage,
        bool second_stage,
        bool only_types,
@ -232,11 +235,14 @@ public:
        const ASTPtr & query_,
        const SyntaxAnalyzerResultPtr & syntax_analyzer_result_,
        const Context & context_,
+        const StorageMetadataPtr & metadata_snapshot_,
        const NameSet & required_result_columns_ = {},
        bool do_global_ = false,
        const SelectQueryOptions & options_ = {})
-    :   ExpressionAnalyzer(query_, syntax_analyzer_result_, context_, options_.subquery_depth, do_global_)
-    ,   required_result_columns(required_result_columns_), query_options(options_)
+        : ExpressionAnalyzer(query_, syntax_analyzer_result_, context_, options_.subquery_depth, do_global_)
+        , metadata_snapshot(metadata_snapshot_)
+        , required_result_columns(required_result_columns_)
+        , query_options(options_)
    {
    }

@ -260,6 +266,7 @@ public:
    void appendProjectResult(ExpressionActionsChain & chain) const;

 private:
+    StorageMetadataPtr metadata_snapshot;
    /// If non-empty, ignore all expressions not from this list.
    NameSet required_result_columns;
    SelectQueryOptions query_options;
--- a/src/Interpreters/InterpreterAlterQuery.cpp
+++ b/src/Interpreters/InterpreterAlterQuery.cpp
@ -43,6 +43,8 @@ BlockIO InterpreterAlterQuery::execute()
    context.checkAccess(getRequiredAccess());
    auto table_id = context.resolveStorageID(alter, Context::ResolveOrdinary);
    StoragePtr table = DatabaseCatalog::instance().getTable(table_id, context);
+    auto alter_lock = table->lockForAlter(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout);
+    auto metadata_snapshot = table->getInMemoryMetadataPtr();

    /// Add default database to table identifiers that we can encounter in e.g. default expressions,
    /// mutation expression, etc.
@ -68,7 +70,7 @@ BlockIO InterpreterAlterQuery::execute()
        }
        else if (auto mut_command = MutationCommand::parse(command_ast))
        {
-            if (mut_command->type == MutationCommand::MATERIALIZE_TTL && !table->hasAnyTTL())
+            if (mut_command->type == MutationCommand::MATERIALIZE_TTL && !metadata_snapshot->hasAnyTTL())
                throw Exception("Cannot MATERIALIZE TTL as there is no TTL set for table "
                    + table->getStorageID().getNameForLogs(), ErrorCodes::INCORRECT_QUERY);

@ -82,16 +84,13 @@ BlockIO InterpreterAlterQuery::execute()

    if (!mutation_commands.empty())
    {
-        auto table_lock_holder = table->lockStructureForShare(
-                false /* because mutation is executed asyncronously */,
-                context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout);
-        MutationsInterpreter(table, mutation_commands, context, false).validate(table_lock_holder);
+        MutationsInterpreter(table, metadata_snapshot, mutation_commands, context, false).validate();
        table->mutate(mutation_commands, context);
    }

    if (!partition_commands.empty())
    {
-        table->alterPartition(query_ptr, partition_commands, context);
+        table->alterPartition(query_ptr, metadata_snapshot, partition_commands, context);
    }

    if (!live_view_commands.empty())
@ -111,13 +110,11 @@ BlockIO InterpreterAlterQuery::execute()

    if (!alter_commands.empty())
    {
-        auto table_lock_holder = table->lockAlterIntention(
-                context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout);
        StorageInMemoryMetadata metadata = table->getInMemoryMetadata();
        alter_commands.validate(metadata, context);
        alter_commands.prepare(metadata);
        table->checkAlterIsPossible(alter_commands, context.getSettingsRef());
-        table->alter(alter_commands, context, table_lock_holder);
+        table->alter(alter_commands, context, alter_lock);
    }

    return {};
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@ -405,7 +405,7 @@ ConstraintsDescription InterpreterCreateQuery::getConstraintsDescription(const A
 InterpreterCreateQuery::TableProperties InterpreterCreateQuery::setProperties(ASTCreateQuery & create) const
 {
    TableProperties properties;
-    TableStructureReadLockHolder as_storage_lock;
+    TableLockHolder as_storage_lock;

    if (create.columns_list)
    {
@ -428,16 +428,16 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::setProperties(AS
        StoragePtr as_storage = DatabaseCatalog::instance().getTable({as_database_name, create.as_table}, context);

        /// as_storage->getColumns() and setEngine(...) must be called under structure lock of other_table for CREATE ... AS other_table.
-        as_storage_lock = as_storage->lockStructureForShare(
-                false, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout);
-        properties.columns = as_storage->getColumns();
+        as_storage_lock = as_storage->lockForShare(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout);
+        auto as_storage_metadata = as_storage->getInMemoryMetadataPtr();
+        properties.columns = as_storage_metadata->getColumns();

        /// Secondary indices make sense only for MergeTree family of storage engines.
        /// We should not copy them for other storages.
        if (create.storage && endsWith(create.storage->engine->name, "MergeTree"))
-            properties.indices = as_storage->getSecondaryIndices();
+            properties.indices = as_storage_metadata->getSecondaryIndices();

-        properties.constraints = as_storage->getConstraints();
+        properties.constraints = as_storage_metadata->getConstraints();
    }
    else if (create.select)
    {
--- a/src/Interpreters/InterpreterDescribeQuery.cpp
+++ b/src/Interpreters/InterpreterDescribeQuery.cpp
@ -89,9 +89,9 @@ BlockInputStreamPtr InterpreterDescribeQuery::executeImpl()
            table = DatabaseCatalog::instance().getTable(table_id, context);
        }

-        auto table_lock = table->lockStructureForShare(
-                false, context.getInitialQueryId(), context.getSettingsRef().lock_acquire_timeout);
-        columns = table->getColumns();
+        auto table_lock = table->lockForShare(context.getInitialQueryId(), context.getSettingsRef().lock_acquire_timeout);
+        auto metadata_snapshot = table->getInMemoryMetadataPtr();
+        columns = metadata_snapshot->getColumns();
    }

    Block sample_block = getSampleBlock();
--- a/src/Interpreters/InterpreterDropQuery.cpp
+++ b/src/Interpreters/InterpreterDropQuery.cpp
@ -93,7 +93,7 @@ BlockIO InterpreterDropQuery::executeToTable(
        {
            context.checkAccess(table->isView() ? AccessType::DROP_VIEW : AccessType::DROP_TABLE, table_id);
            table->shutdown();
-            TableStructureWriteLockHolder table_lock;
+            TableExclusiveLockHolder table_lock;
            if (database->getEngineName() != "Atomic")
                table_lock = table->lockExclusively(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout);
            /// Drop table from memory, don't touch data and metadata
@ -105,8 +105,9 @@ BlockIO InterpreterDropQuery::executeToTable(
            table->checkTableCanBeDropped();

            auto table_lock = table->lockExclusively(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout);
+            auto metadata_snapshot = table->getInMemoryMetadataPtr();
            /// Drop table data, don't touch metadata
-            table->truncate(query_ptr, context, table_lock);
+            table->truncate(query_ptr, metadata_snapshot, context, table_lock);
        }
        else if (query.kind == ASTDropQuery::Kind::Drop)
        {
@ -115,7 +116,7 @@ BlockIO InterpreterDropQuery::executeToTable(

            table->shutdown();

-            TableStructureWriteLockHolder table_lock;
+            TableExclusiveLockHolder table_lock;
            if (database->getEngineName() != "Atomic")
                table_lock = table->lockExclusively(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout);

@ -187,7 +188,8 @@ BlockIO InterpreterDropQuery::executeToTemporaryTable(const String & table_name,
            {
                auto table_lock = table->lockExclusively(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout);
                /// Drop table data, don't touch metadata
-                table->truncate(query_ptr, context, table_lock);
+                auto metadata_snapshot = table->getInMemoryMetadataPtr();
+                table->truncate(query_ptr, metadata_snapshot, context, table_lock);
            }
            else if (kind == ASTDropQuery::Kind::Drop)
            {
--- a/src/Interpreters/InterpreterInsertQuery.cpp
+++ b/src/Interpreters/InterpreterInsertQuery.cpp
@ -68,19 +68,22 @@ StoragePtr InterpreterInsertQuery::getTable(ASTInsertQuery & query)
    return DatabaseCatalog::instance().getTable(query.table_id, context);
 }

-Block InterpreterInsertQuery::getSampleBlock(const ASTInsertQuery & query, const StoragePtr & table) const
+Block InterpreterInsertQuery::getSampleBlock(
+    const ASTInsertQuery & query,
+    const StoragePtr & table,
+    const StorageMetadataPtr & metadata_snapshot) const
 {
-    Block table_sample_non_materialized = table->getSampleBlockNonMaterialized();
+    Block table_sample_non_materialized = metadata_snapshot->getSampleBlockNonMaterialized();
    /// If the query does not include information about columns
    if (!query.columns)
    {
        if (no_destination)
-            return table->getSampleBlockWithVirtuals();
+            return metadata_snapshot->getSampleBlockWithVirtuals(table->getVirtuals());
        else
            return table_sample_non_materialized;
    }

-    Block table_sample = table->getSampleBlock();
+    Block table_sample = metadata_snapshot->getSampleBlock();
    /// Form the block based on the column names from the query
    Block res;
    for (const auto & identifier : query.columns->children)
@ -110,10 +113,10 @@ BlockIO InterpreterInsertQuery::execute()
    BlockIO res;

    StoragePtr table = getTable(query);
-    auto table_lock = table->lockStructureForShare(
-            true, context.getInitialQueryId(), context.getSettingsRef().lock_acquire_timeout);
+    auto table_lock = table->lockForShare(context.getInitialQueryId(), context.getSettingsRef().lock_acquire_timeout);
+    auto metadata_snapshot = table->getInMemoryMetadataPtr();

-    auto query_sample_block = getSampleBlock(query, table);
+    auto query_sample_block = getSampleBlock(query, table, metadata_snapshot);
    if (!query.table_function)
        context.checkAccess(AccessType::INSERT, query.table_id, query_sample_block.getNames());

@ -221,21 +224,21 @@ BlockIO InterpreterInsertQuery::execute()
            /// NOTE: we explicitly ignore bound materialized views when inserting into Kafka Storage.
            ///       Otherwise we'll get duplicates when MV reads same rows again from Kafka.
            if (table->noPushingToViews() && !no_destination)
-                out = table->write(query_ptr, context);
+                out = table->write(query_ptr, metadata_snapshot, context);
            else
-                out = std::make_shared<PushingToViewsBlockOutputStream>(table, context, query_ptr, no_destination);
+                out = std::make_shared<PushingToViewsBlockOutputStream>(table, metadata_snapshot, context, query_ptr, no_destination);

            /// Note that we wrap transforms one on top of another, so we write them in reverse of data processing order.

            /// Checking constraints. It must be done after calculation of all defaults, so we can check them on calculated columns.
-            if (const auto & constraints = table->getConstraints(); !constraints.empty())
+            if (const auto & constraints = metadata_snapshot->getConstraints(); !constraints.empty())
                out = std::make_shared<CheckConstraintsBlockOutputStream>(
-                    query.table_id, out, out->getHeader(), table->getConstraints(), context);
+                    query.table_id, out, out->getHeader(), metadata_snapshot->getConstraints(), context);

            /// Actually we don't know structure of input blocks from query/table,
            /// because some clients break insertion protocol (columns != header)
            out = std::make_shared<AddingDefaultBlockOutputStream>(
-                out, query_sample_block, out->getHeader(), table->getColumns().getDefaults(), context);
+                out, query_sample_block, out->getHeader(), metadata_snapshot->getColumns().getDefaults(), context);

            /// It's important to squash blocks as early as possible (before other transforms),
            ///  because other transforms may work inefficient if block size is small.
@ -286,7 +289,7 @@ BlockIO InterpreterInsertQuery::execute()

        if (!allow_materialized)
        {
-            for (const auto & column : table->getColumns())
+            for (const auto & column : metadata_snapshot->getColumns())
                if (column.default_desc.kind == ColumnDefaultKind::Materialized && header.has(column.name))
                    throw Exception("Cannot insert column " + column.name + ", because it is MATERIALIZED column.", ErrorCodes::ILLEGAL_COLUMN);
        }
--- a/src/Interpreters/InterpreterInsertQuery.h
+++ b/src/Interpreters/InterpreterInsertQuery.h
@ -4,6 +4,7 @@
 #include <DataStreams/BlockIO.h>
 #include <Interpreters/IInterpreter.h>
 #include <Parsers/ASTInsertQuery.h>
+#include <Storages/StorageInMemoryMetadata.h>

 namespace DB
 {
@ -34,7 +35,7 @@ public:

 private:
    StoragePtr getTable(ASTInsertQuery & query);
-    Block getSampleBlock(const ASTInsertQuery & query, const StoragePtr & table) const;
+    Block getSampleBlock(const ASTInsertQuery & query, const StoragePtr & table, const StorageMetadataPtr & metadata_snapshot) const;

    ASTPtr query_ptr;
    const Context & context;
--- a/src/Interpreters/InterpreterOptimizeQuery.cpp
+++ b/src/Interpreters/InterpreterOptimizeQuery.cpp
@ -26,7 +26,8 @@ BlockIO InterpreterOptimizeQuery::execute()

    auto table_id = context.resolveStorageID(ast, Context::ResolveOrdinary);
    StoragePtr table = DatabaseCatalog::instance().getTable(table_id, context);
-    table->optimize(query_ptr, ast.partition, ast.final, ast.deduplicate, context);
+    auto metadata_snapshot = table->getInMemoryMetadataPtr();
+    table->optimize(query_ptr, metadata_snapshot, ast.partition, ast.final, ast.deduplicate, context);
    return {};
 }

--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@ -130,8 +130,8 @@ String InterpreterSelectQuery::generateFilterActions(
    table_expr->children.push_back(table_expr->database_and_table_name);

    /// Using separate expression analyzer to prevent any possible alias injection
-    auto syntax_result = SyntaxAnalyzer(*context).analyzeSelect(query_ast, SyntaxAnalyzerResult({}, storage));
-    SelectQueryExpressionAnalyzer analyzer(query_ast, syntax_result, *context);
+    auto syntax_result = SyntaxAnalyzer(*context).analyzeSelect(query_ast, SyntaxAnalyzerResult({}, storage, metadata_snapshot));
+    SelectQueryExpressionAnalyzer analyzer(query_ast, syntax_result, *context, metadata_snapshot);
    actions = analyzer.simpleSelectActions();

    return expr_list->children.at(0)->getColumnName();
@ -166,8 +166,9 @@ InterpreterSelectQuery::InterpreterSelectQuery(
    const ASTPtr & query_ptr_,
    const Context & context_,
    const StoragePtr & storage_,
+    const StorageMetadataPtr & metadata_snapshot_,
    const SelectQueryOptions & options_)
-    : InterpreterSelectQuery(query_ptr_, context_, nullptr, std::nullopt, storage_, options_.copy().noSubquery())
+    : InterpreterSelectQuery(query_ptr_, context_, nullptr, std::nullopt, storage_, options_.copy().noSubquery(), {}, metadata_snapshot_)
 {}

 InterpreterSelectQuery::~InterpreterSelectQuery() = default;
@ -215,7 +216,8 @@ InterpreterSelectQuery::InterpreterSelectQuery(
    std::optional<Pipe> input_pipe_,
    const StoragePtr & storage_,
    const SelectQueryOptions & options_,
-    const Names & required_result_column_names)
+    const Names & required_result_column_names,
+    const StorageMetadataPtr & metadata_snapshot_)
    : options(options_)
    /// NOTE: the query almost always should be cloned because it will be modified during analysis.
    , query_ptr(options.modify_inplace ? query_ptr_ : query_ptr_->clone())
@ -224,6 +226,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
    , input(input_)
    , input_pipe(std::move(input_pipe_))
    , log(&Poco::Logger::get("InterpreterSelectQuery"))
+    , metadata_snapshot(metadata_snapshot_)
 {
    checkStackSize();

@ -253,13 +256,14 @@ InterpreterSelectQuery::InterpreterSelectQuery(

    if (storage)
    {
-        table_lock = storage->lockStructureForShare(
-                false, context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout);
+        table_lock = storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout);
        table_id = storage->getStorageID();
+        if (metadata_snapshot == nullptr)
+            metadata_snapshot = storage->getInMemoryMetadataPtr();
    }

    if (has_input || !joined_tables.resolveTables())
-        joined_tables.makeFakeTable(storage, source_header);
+        joined_tables.makeFakeTable(storage, metadata_snapshot, source_header);

    /// Rewrite JOINs
    if (!has_input && joined_tables.tablesCount() > 1)
@ -273,7 +277,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
        {
            /// Rewritten with subquery. Free storage locks here.
            storage = {};
-            table_lock.release();
+            table_lock.reset();
            table_id = StorageID::createEmpty();
        }
    }
@ -304,11 +308,12 @@ InterpreterSelectQuery::InterpreterSelectQuery(
        /// Allow push down and other optimizations for VIEW: replace with subquery and rewrite it.
        ASTPtr view_table;
        if (view)
-            view->replaceWithSubquery(getSelectQuery(), view_table);
+            view->replaceWithSubquery(getSelectQuery(), view_table, metadata_snapshot);

        syntax_analyzer_result = SyntaxAnalyzer(*context).analyzeSelect(
-                query_ptr, SyntaxAnalyzerResult(source_header.getNamesAndTypesList(), storage),
-                options, joined_tables.tablesWithColumns(), required_result_column_names, table_join);
+            query_ptr,
+            SyntaxAnalyzerResult(source_header.getNamesAndTypesList(), storage, metadata_snapshot),
+            options, joined_tables.tablesWithColumns(), required_result_column_names, table_join);

        /// Save scalar sub queries's results in the query context
        if (!options.only_analyze && context->hasQueryContext())
@ -331,12 +336,12 @@ InterpreterSelectQuery::InterpreterSelectQuery(
                current_info.query = query_ptr;
                current_info.syntax_analyzer_result = syntax_analyzer_result;

-                MergeTreeWhereOptimizer{current_info, *context, *merge_tree, syntax_analyzer_result->requiredSourceColumns(), log};
+                MergeTreeWhereOptimizer{current_info, *context, *merge_tree, metadata_snapshot, syntax_analyzer_result->requiredSourceColumns(), log};
            }
        }

        query_analyzer = std::make_unique<SelectQueryExpressionAnalyzer>(
-                query_ptr, syntax_analyzer_result, *context,
+                query_ptr, syntax_analyzer_result, *context, metadata_snapshot,
                NameSet(required_result_column_names.begin(), required_result_column_names.end()),
                !options.only_analyze, options);

@ -377,14 +382,15 @@ InterpreterSelectQuery::InterpreterSelectQuery(

        if (storage)
        {
-            source_header = storage->getSampleBlockForColumns(required_columns);
+            source_header = metadata_snapshot->getSampleBlockForColumns(required_columns, storage->getVirtuals(), storage->getStorageID());

            /// Fix source_header for filter actions.
            if (row_policy_filter)
            {
                filter_info = std::make_shared<FilterInfo>();
                filter_info->column_name = generateFilterActions(filter_info->actions, row_policy_filter, required_columns);
-                source_header = storage->getSampleBlockForColumns(filter_info->actions->getRequiredColumns());
+                source_header = metadata_snapshot->getSampleBlockForColumns(
+                    filter_info->actions->getRequiredColumns(), storage->getVirtuals(), storage->getStorageID());
            }
        }

@ -497,6 +503,7 @@ Block InterpreterSelectQuery::getSampleBlockImpl()

    analysis_result = ExpressionAnalysisResult(
            *query_analyzer,
+            metadata_snapshot,
            first_stage,
            second_stage,
            options.only_analyze,
@ -1105,7 +1112,7 @@ void InterpreterSelectQuery::executeFetchColumns(

        /// Detect, if ALIAS columns are required for query execution
        auto alias_columns_required = false;
-        const ColumnsDescription & storage_columns = storage->getColumns();
+        const ColumnsDescription & storage_columns = metadata_snapshot->getColumns();
        for (const auto & column_name : required_columns)
        {
            auto column_default = storage_columns.getDefault(column_name);
@ -1197,7 +1204,7 @@ void InterpreterSelectQuery::executeFetchColumns(
                    = ext::map<NameSet>(required_columns_after_prewhere, [](const auto & it) { return it.name; });
            }

-            auto syntax_result = SyntaxAnalyzer(*context).analyze(required_columns_all_expr, required_columns_after_prewhere, storage);
+            auto syntax_result = SyntaxAnalyzer(*context).analyze(required_columns_all_expr, required_columns_after_prewhere, storage, metadata_snapshot);
            alias_actions = ExpressionAnalyzer(required_columns_all_expr, syntax_result, *context).getActions(true);

            /// The set of required columns could be added as a result of adding an action to calculate ALIAS.
@ -1228,7 +1235,7 @@ void InterpreterSelectQuery::executeFetchColumns(
                prewhere_info->prewhere_actions = std::move(new_actions);

                auto analyzed_result
-                    = SyntaxAnalyzer(*context).analyze(required_columns_from_prewhere_expr, storage->getColumns().getAllPhysical());
+                    = SyntaxAnalyzer(*context).analyze(required_columns_from_prewhere_expr, metadata_snapshot->getColumns().getAllPhysical());
                prewhere_info->alias_actions
                    = ExpressionAnalyzer(required_columns_from_prewhere_expr, analyzed_result, *context).getActions(true, false);

@ -1332,7 +1339,6 @@ void InterpreterSelectQuery::executeFetchColumns(
    else if (storage)
    {
        /// Table.
-
        if (max_streams == 0)
            throw Exception("Logical error: zero number of streams requested", ErrorCodes::LOGICAL_ERROR);

@ -1360,11 +1366,11 @@ void InterpreterSelectQuery::executeFetchColumns(
                    getSortDescriptionFromGroupBy(query),
                    query_info.syntax_analyzer_result);

-            query_info.input_order_info = query_info.order_optimizer->getInputOrder(storage);
+            query_info.input_order_info = query_info.order_optimizer->getInputOrder(storage, metadata_snapshot);
        }

        auto read_step = std::make_unique<ReadFromStorageStep>(
-                table_lock, options, storage,
+            table_lock, metadata_snapshot, options, storage,
                required_columns, query_info, context, processing_stage, max_block_size, max_streams);

        read_step->setStepDescription("Read from " + storage->getName());
--- a/src/Interpreters/InterpreterSelectQuery.h
+++ b/src/Interpreters/InterpreterSelectQuery.h
@ -10,7 +10,7 @@
 #include <Interpreters/IInterpreter.h>
 #include <Interpreters/SelectQueryOptions.h>
 #include <Storages/SelectQueryInfo.h>
-#include <Storages/TableStructureLockHolder.h>
+#include <Storages/TableLockHolder.h>
 #include <Storages/ReadInOrderOptimizer.h>
 #include <Interpreters/StorageID.h>

@ -70,6 +70,7 @@ public:
        const ASTPtr & query_ptr_,
        const Context & context_,
        const StoragePtr & storage_,
+        const StorageMetadataPtr & metadata_snapshot_ = nullptr,
        const SelectQueryOptions & = {});

    ~InterpreterSelectQuery() override;
@ -101,7 +102,8 @@ private:
        std::optional<Pipe> input_pipe,
        const StoragePtr & storage_,
        const SelectQueryOptions &,
-        const Names & required_result_column_names = {});
+        const Names & required_result_column_names = {},
+        const StorageMetadataPtr & metadata_snapshot_= nullptr);

    ASTSelectQuery & getSelectQuery() { return query_ptr->as<ASTSelectQuery &>(); }

@ -186,13 +188,14 @@ private:
    /// Table from where to read data, if not subquery.
    StoragePtr storage;
    StorageID table_id = StorageID::createEmpty();  /// Will be initialized if storage is not nullptr
-    TableStructureReadLockHolder table_lock;
+    TableLockHolder table_lock;

    /// Used when we read from prepared input, not table or subquery.
    BlockInputStreamPtr input;
    std::optional<Pipe> input_pipe;

    Poco::Logger * log;
+    StorageMetadataPtr metadata_snapshot;
 };

 }
--- a/src/Interpreters/InterpreterWatchQuery.cpp
+++ b/src/Interpreters/InterpreterWatchQuery.cpp
@ -47,7 +47,7 @@ BlockIO InterpreterWatchQuery::execute()
        ErrorCodes::UNKNOWN_TABLE);

    /// List of columns to read to execute the query.
-    Names required_columns = storage->getColumns().getNamesOfPhysical();
+    Names required_columns = storage->getInMemoryMetadataPtr()->getColumns().getNamesOfPhysical();
    context.checkAccess(AccessType::SELECT, table_id, required_columns);

    /// Get context settings for this query
--- a/src/Interpreters/JoinedTables.cpp
+++ b/src/Interpreters/JoinedTables.cpp
@ -207,11 +207,11 @@ bool JoinedTables::resolveTables()
    return !tables_with_columns.empty();
 }

-void JoinedTables::makeFakeTable(StoragePtr storage, const Block & source_header)
+void JoinedTables::makeFakeTable(StoragePtr storage, const StorageMetadataPtr & metadata_snapshot, const Block & source_header)
 {
    if (storage)
    {
-        const ColumnsDescription & storage_columns = storage->getColumns();
+        const ColumnsDescription & storage_columns = metadata_snapshot->getColumns();
        tables_with_columns.emplace_back(DatabaseAndTableWithAlias{}, storage_columns.getOrdinary());

        auto & table = tables_with_columns.back();
--- a/src/Interpreters/JoinedTables.h
+++ b/src/Interpreters/JoinedTables.h
@ -13,6 +13,8 @@ namespace DB
 class ASTSelectQuery;
 class TableJoin;
 struct SelectQueryOptions;
+struct StorageInMemoryMetadata;
+using StorageMetadataPtr = std::shared_ptr<const StorageInMemoryMetadata>;

 /// Joined tables' columns resolver.
 /// We want to get each table structure at most once per table occurance. Or even better once per table.
@ -31,7 +33,7 @@ public:
    bool resolveTables();

    /// Make fake tables_with_columns[0] in case we have predefined input in InterpreterSelectQuery
-    void makeFakeTable(StoragePtr storage, const Block & source_header);
+    void makeFakeTable(StoragePtr storage, const StorageMetadataPtr & metadata_snapshot, const Block & source_header);
    std::shared_ptr<TableJoin> makeTableJoin(const ASTSelectQuery & select_query);

    const TablesWithColumns & tablesWithColumns() const { return tables_with_columns; }
--- a/src/Interpreters/MutationsInterpreter.cpp
+++ b/src/Interpreters/MutationsInterpreter.cpp
@ -137,13 +137,13 @@ ASTPtr prepareQueryAffectedAST(const std::vector<MutationCommand> & commands)
    return select;
 }

-ColumnDependencies getAllColumnDependencies(const StoragePtr & storage, const NameSet & updated_columns)
+ColumnDependencies getAllColumnDependencies(const StorageMetadataPtr & metadata_snapshot, const NameSet & updated_columns)
 {
    NameSet new_updated_columns = updated_columns;
    ColumnDependencies dependencies;
    while (!new_updated_columns.empty())
    {
-        auto new_dependencies = storage->getColumnDependencies(new_updated_columns);
+        auto new_dependencies = metadata_snapshot->getColumnDependencies(new_updated_columns);
        new_updated_columns.clear();
        for (const auto & dependency : new_dependencies)
        {
@ -163,6 +163,7 @@ ColumnDependencies getAllColumnDependencies(const StoragePtr & storage, const Na

 bool isStorageTouchedByMutations(
    StoragePtr storage,
+    const StorageMetadataPtr & metadata_snapshot,
    const std::vector<MutationCommand> & commands,
    Context context_copy)
 {
@ -183,7 +184,7 @@ bool isStorageTouchedByMutations(
    /// Interpreter must be alive, when we use result of execute() method.
    /// For some reason it may copy context and and give it into ExpressionBlockInputStream
    /// after that we will use context from destroyed stack frame in our stream.
-    InterpreterSelectQuery interpreter(select_query, context_copy, storage, SelectQueryOptions().ignoreLimits());
+    InterpreterSelectQuery interpreter(select_query, context_copy, storage, metadata_snapshot, SelectQueryOptions().ignoreLimits());
    BlockInputStreamPtr in = interpreter.execute().getInputStream();

    Block block = in->read();
@ -200,20 +201,22 @@ bool isStorageTouchedByMutations(

 MutationsInterpreter::MutationsInterpreter(
    StoragePtr storage_,
+    const StorageMetadataPtr & metadata_snapshot_,
    MutationCommands commands_,
    const Context & context_,
    bool can_execute_)
    : storage(std::move(storage_))
+    , metadata_snapshot(metadata_snapshot_)
    , commands(std::move(commands_))
    , context(context_)
    , can_execute(can_execute_)
 {
    mutation_ast = prepare(!can_execute);
    SelectQueryOptions limits = SelectQueryOptions().analyze(!can_execute).ignoreLimits();
-    select_interpreter = std::make_unique<InterpreterSelectQuery>(mutation_ast, context, storage, limits);
+    select_interpreter = std::make_unique<InterpreterSelectQuery>(mutation_ast, context, storage, metadata_snapshot_, limits);
 }

-static NameSet getKeyColumns(const StoragePtr & storage)
+static NameSet getKeyColumns(const StoragePtr & storage, const StorageMetadataPtr & metadata_snapshot)
 {
    const MergeTreeData * merge_tree_data = dynamic_cast<const MergeTreeData *>(storage.get());
    if (!merge_tree_data)
@ -221,10 +224,10 @@ static NameSet getKeyColumns(const StoragePtr & storage)

    NameSet key_columns;

-    for (const String & col : merge_tree_data->getColumnsRequiredForPartitionKey())
+    for (const String & col : metadata_snapshot->getColumnsRequiredForPartitionKey())
        key_columns.insert(col);

-    for (const String & col : merge_tree_data->getColumnsRequiredForSortingKey())
+    for (const String & col : metadata_snapshot->getColumnsRequiredForSortingKey())
        key_columns.insert(col);
    /// We don't process sample_by_ast separately because it must be among the primary key columns.

@ -238,15 +241,16 @@ static NameSet getKeyColumns(const StoragePtr & storage)
 }

 static void validateUpdateColumns(
-    const StoragePtr & storage, const NameSet & updated_columns,
+    const StoragePtr & storage,
+    const StorageMetadataPtr & metadata_snapshot, const NameSet & updated_columns,
    const std::unordered_map<String, Names> & column_to_affected_materialized)
 {
-    NameSet key_columns = getKeyColumns(storage);
+    NameSet key_columns = getKeyColumns(storage, metadata_snapshot);

    for (const String & column_name : updated_columns)
    {
        auto found = false;
-        for (const auto & col : storage->getColumns().getOrdinary())
+        for (const auto & col : metadata_snapshot->getColumns().getOrdinary())
        {
            if (col.name == column_name)
            {
@ -257,7 +261,7 @@ static void validateUpdateColumns(

        if (!found)
        {
-            for (const auto & col : storage->getColumns().getMaterialized())
+            for (const auto & col : metadata_snapshot->getColumns().getMaterialized())
            {
                if (col.name == column_name)
                    throw Exception("Cannot UPDATE materialized column " + backQuote(column_name), ErrorCodes::CANNOT_UPDATE_COLUMN);
@ -293,8 +297,8 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run)
        throw Exception("Empty mutation commands list", ErrorCodes::LOGICAL_ERROR);


-    const ColumnsDescription & columns_desc = storage->getColumns();
-    const IndicesDescription & indices_desc = storage->getSecondaryIndices();
+    const ColumnsDescription & columns_desc = metadata_snapshot->getColumns();
+    const IndicesDescription & indices_desc = metadata_snapshot->getSecondaryIndices();
    NamesAndTypesList all_columns = columns_desc.getAllPhysical();

    NameSet updated_columns;
@ -325,11 +329,11 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run)
            }
        }

-        validateUpdateColumns(storage, updated_columns, column_to_affected_materialized);
+        validateUpdateColumns(storage, metadata_snapshot, updated_columns, column_to_affected_materialized);
    }

    /// Columns, that we need to read for calculation of skip indices or TTL expressions.
-    auto dependencies = getAllColumnDependencies(storage, updated_columns);
+    auto dependencies = getAllColumnDependencies(metadata_snapshot, updated_columns);

    /// First, break a sequence of commands into stages.
    for (const auto & command : commands)
@ -407,7 +411,7 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run)
        }
        else if (command.type == MutationCommand::MATERIALIZE_TTL)
        {
-            if (storage->hasRowsTTL())
+            if (metadata_snapshot->hasRowsTTL())
            {
                for (const auto & column : all_columns)
                    dependencies.emplace(column.name, ColumnDependency::TTL_TARGET);
@ -415,7 +419,7 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run)
            else
            {
                NameSet new_updated_columns;
-                auto column_ttls = storage->getColumns().getColumnTTLs();
+                auto column_ttls = metadata_snapshot->getColumns().getColumnTTLs();
                for (const auto & elem : column_ttls)
                {
                    dependencies.emplace(elem.first, ColumnDependency::TTL_TARGET);
@ -423,7 +427,7 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run)
                }

                auto all_columns_vec = all_columns.getNames();
-                auto all_dependencies = getAllColumnDependencies(storage, NameSet(all_columns_vec.begin(), all_columns_vec.end()));
+                auto all_dependencies = getAllColumnDependencies(metadata_snapshot, NameSet(all_columns_vec.begin(), all_columns_vec.end()));

                for (const auto & dependency : all_dependencies)
                {
@ -432,7 +436,7 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run)
                }

                /// Recalc only skip indices of columns, that could be updated by TTL.
-                auto new_dependencies = storage->getColumnDependencies(new_updated_columns);
+                auto new_dependencies = metadata_snapshot->getColumnDependencies(new_updated_columns);
                for (const auto & dependency : new_dependencies)
                {
                    if (dependency.kind == ColumnDependency::SKIP_INDEX)
@ -502,7 +506,7 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run)
                }

                const ASTPtr select_query = prepareInterpreterSelectQuery(stages_copy, /* dry_run = */ true);
-                InterpreterSelectQuery interpreter{select_query, context, storage, SelectQueryOptions().analyze(/* dry_run = */ false).ignoreLimits()};
+                InterpreterSelectQuery interpreter{select_query, context, storage, metadata_snapshot, SelectQueryOptions().analyze(/* dry_run = */ false).ignoreLimits()};

                auto first_stage_header = interpreter.getSampleBlock();
                auto in = std::make_shared<NullBlockInputStream>(first_stage_header);
@ -524,7 +528,7 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run)

 ASTPtr MutationsInterpreter::prepareInterpreterSelectQuery(std::vector<Stage> & prepared_stages, bool dry_run)
 {
-    NamesAndTypesList all_columns = storage->getColumns().getAllPhysical();
+    NamesAndTypesList all_columns = metadata_snapshot->getColumns().getAllPhysical();


    /// Next, for each stage calculate columns changed by this and previous stages.
@ -667,7 +671,7 @@ BlockInputStreamPtr MutationsInterpreter::addStreamsForLaterStages(const std::ve
    return in;
 }

-void MutationsInterpreter::validate(TableStructureReadLockHolder &)
+void MutationsInterpreter::validate()
 {
    const Settings & settings = context.getSettingsRef();

@ -692,7 +696,7 @@ void MutationsInterpreter::validate(TableStructureReadLockHolder &)
    addStreamsForLaterStages(stages, in)->getHeader();
 }

-BlockInputStreamPtr MutationsInterpreter::execute(TableStructureReadLockHolder &)
+BlockInputStreamPtr MutationsInterpreter::execute()
 {
    if (!can_execute)
        throw Exception("Cannot execute mutations interpreter because can_execute flag set to false", ErrorCodes::LOGICAL_ERROR);
@ -729,7 +733,7 @@ size_t MutationsInterpreter::evaluateCommandsSize()

 std::optional<SortDescription> MutationsInterpreter::getStorageSortDescriptionIfPossible(const Block & header) const
 {
-    Names sort_columns = storage->getSortingKeyColumns();
+    Names sort_columns = metadata_snapshot->getSortingKeyColumns();
    SortDescription sort_description;
    size_t sort_columns_size = sort_columns.size();
    sort_description.reserve(sort_columns_size);
--- a/src/Interpreters/MutationsInterpreter.h
+++ b/src/Interpreters/MutationsInterpreter.h
@ -15,7 +15,8 @@ namespace DB
 class Context;

 /// Return false if the data isn't going to be changed by mutations.
-bool isStorageTouchedByMutations(StoragePtr storage, const std::vector<MutationCommand> & commands, Context context_copy);
+bool isStorageTouchedByMutations(
+    StoragePtr storage, const StorageMetadataPtr & metadata_snapshot, const std::vector<MutationCommand> & commands, Context context_copy);

 /// Create an input stream that will read data from storage and apply mutation commands (UPDATEs, DELETEs, MATERIALIZEs)
 /// to this data.
@ -24,14 +25,19 @@ class MutationsInterpreter
 public:
    /// Storage to mutate, array of mutations commands and context. If you really want to execute mutation
    /// use can_execute = true, in other cases (validation, amount of commands) it can be false
-    MutationsInterpreter(StoragePtr storage_, MutationCommands commands_, const Context & context_, bool can_execute_);
+    MutationsInterpreter(
+        StoragePtr storage_,
+        const StorageMetadataPtr & metadata_snapshot_,
+        MutationCommands commands_,
+        const Context & context_,
+        bool can_execute_);

-    void validate(TableStructureReadLockHolder & table_lock_holder);
+    void validate();

    size_t evaluateCommandsSize();

    /// The resulting stream will return blocks containing only changed columns and columns, that we need to recalculate indices.
-    BlockInputStreamPtr execute(TableStructureReadLockHolder & table_lock_holder);
+    BlockInputStreamPtr execute();

    /// Only changed columns.
    const Block & getUpdatedHeader() const;
@ -47,6 +53,7 @@ private:
    std::optional<SortDescription> getStorageSortDescriptionIfPossible(const Block & header) const;

    StoragePtr storage;
+    StorageMetadataPtr metadata_snapshot;
    MutationCommands commands;
    Context context;
    bool can_execute;
--- a/src/Interpreters/SyntaxAnalyzer.cpp
+++ b/src/Interpreters/SyntaxAnalyzer.cpp
@ -714,7 +714,7 @@ void SyntaxAnalyzerResult::collectSourceColumns(bool add_special)
 {
    if (storage)
    {
-        const ColumnsDescription & columns = storage->getColumns();
+        const ColumnsDescription & columns = metadata_snapshot->getColumns();

        auto columns_from_storage = add_special ? columns.getAll() : columns.getAllPhysical();
        if (source_columns.empty())
@ -1005,14 +1005,19 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyzeSelect(
    return std::make_shared<const SyntaxAnalyzerResult>(result);
 }

-SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(ASTPtr & query, const NamesAndTypesList & source_columns, ConstStoragePtr storage, bool allow_aggregations) const
+SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(
+    ASTPtr & query,
+    const NamesAndTypesList & source_columns,
+    ConstStoragePtr storage,
+    const StorageMetadataPtr & metadata_snapshot,
+    bool allow_aggregations) const
 {
    if (query->as<ASTSelectQuery>())
        throw Exception("Not select analyze for select asts.", ErrorCodes::LOGICAL_ERROR);

    const auto & settings = context.getSettingsRef();

-    SyntaxAnalyzerResult result(source_columns, storage, false);
+    SyntaxAnalyzerResult result(source_columns, storage, metadata_snapshot, false);

    normalize(query, result.aliases, settings);

--- a/src/Interpreters/SyntaxAnalyzer.h
+++ b/src/Interpreters/SyntaxAnalyzer.h
@ -16,10 +16,13 @@ class Context;
 struct Settings;
 struct SelectQueryOptions;
 using Scalars = std::map<String, Block>;
+struct StorageInMemoryMetadata;
+using StorageMetadataPtr = std::shared_ptr<const StorageInMemoryMetadata>;

 struct SyntaxAnalyzerResult
 {
    ConstStoragePtr storage;
+    StorageMetadataPtr metadata_snapshot;
    std::shared_ptr<TableJoin> analyzed_join;

    NamesAndTypesList source_columns;
@ -51,8 +54,13 @@ struct SyntaxAnalyzerResult
    /// Results of scalar sub queries
    Scalars scalars;

-    SyntaxAnalyzerResult(const NamesAndTypesList & source_columns_, ConstStoragePtr storage_ = {}, bool add_special = true)
+    SyntaxAnalyzerResult(
+        const NamesAndTypesList & source_columns_,
+        ConstStoragePtr storage_ = {},
+        const StorageMetadataPtr & metadata_snapshot_ = {},
+        bool add_special = true)
        : storage(storage_)
+        , metadata_snapshot(metadata_snapshot_)
        , source_columns(source_columns_)
    {
        collectSourceColumns(add_special);
@ -86,7 +94,12 @@ public:
    {}

    /// Analyze and rewrite not select query
-    SyntaxAnalyzerResultPtr analyze(ASTPtr & query, const NamesAndTypesList & source_columns_, ConstStoragePtr storage = {}, bool allow_aggregations = false) const;
+    SyntaxAnalyzerResultPtr analyze(
+        ASTPtr & query,
+        const NamesAndTypesList & source_columns_,
+        ConstStoragePtr storage = {},
+        const StorageMetadataPtr & metadata_snapshot = {},
+        bool allow_aggregations = false) const;

    /// Analyze and rewrite select query
    SyntaxAnalyzerResultPtr analyzeSelect(
--- a/src/Interpreters/SystemLog.h
+++ b/src/Interpreters/SystemLog.h
@ -457,8 +457,9 @@ void SystemLog<LogElement>::prepareTable()

    if (table)
    {
+        auto metadata_snapshot = table->getInMemoryMetadataPtr();
        const Block expected = LogElement::createBlock();
-        const Block actual = table->getSampleBlockNonMaterialized();
+        const Block actual = metadata_snapshot->getSampleBlockNonMaterialized();

        if (!blocksHaveEqualStructure(actual, expected))
        {
--- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp
+++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp
@ -210,13 +210,11 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt
                if (tables_with_columns.empty())
                    throw Exception("An asterisk cannot be replaced with empty columns.", ErrorCodes::LOGICAL_ERROR);
                has_asterisk = true;
-                break;
            }
            else if (const auto * qa = child->as<ASTQualifiedAsterisk>())
            {
                visit(*qa, child, data); /// check if it's OK before rewrite
                has_asterisk = true;
-                break;
            }
        }

--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@ -351,8 +351,9 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
                {
                    StoragePtr storage = context.executeTableFunction(input_function);
                    auto & input_storage = dynamic_cast<StorageInput &>(*storage);
-                    BlockInputStreamPtr input_stream = std::make_shared<InputStreamFromASTInsertQuery>(ast, istr,
-                        input_storage.getSampleBlock(), context, input_function);
+                    auto input_metadata_snapshot = input_storage.getInMemoryMetadataPtr();
+                    BlockInputStreamPtr input_stream = std::make_shared<InputStreamFromASTInsertQuery>(
+                        ast, istr, input_metadata_snapshot->getSampleBlock(), context, input_function);
                    input_storage.setInputStream(input_stream);
                }
            }
--- a/src/Interpreters/getTableExpressions.cpp
+++ b/src/Interpreters/getTableExpressions.cpp
@ -87,7 +87,8 @@ static NamesAndTypesList getColumnsFromTableExpression(const ASTTableExpression
        const auto table_function = table_expression.table_function;
        auto * query_context = const_cast<Context *>(&context.getQueryContext());
        const auto & function_storage = query_context->executeTableFunction(table_function);
-        const auto & columns = function_storage->getColumns();
+        auto function_metadata_snapshot = function_storage->getInMemoryMetadataPtr();
+        const auto & columns = function_metadata_snapshot->getColumns();
        names_and_type_list = columns.getOrdinary();
        materialized = columns.getMaterialized();
        aliases = columns.getAliases();
@ -97,7 +98,8 @@ static NamesAndTypesList getColumnsFromTableExpression(const ASTTableExpression
    {
        auto table_id = context.resolveStorageID(table_expression.database_and_table_name);
        const auto & table = DatabaseCatalog::instance().getTable(table_id, context);
-        const auto & columns = table->getColumns();
+        auto table_metadata_snapshot = table->getInMemoryMetadataPtr();
+        const auto & columns = table_metadata_snapshot->getColumns();
        names_and_type_list = columns.getOrdinary();
        materialized = columns.getMaterialized();
        aliases = columns.getAliases();
--- a/src/Interpreters/interpretSubquery.cpp
+++ b/src/Interpreters/interpretSubquery.cpp
@ -90,14 +90,14 @@ std::shared_ptr<InterpreterSelectWithUnionQuery> interpretSubquery(
        {
            auto * query_context = const_cast<Context *>(&context.getQueryContext());
            const auto & storage = query_context->executeTableFunction(table_expression);
-            columns = storage->getColumns().getOrdinary();
+            columns = storage->getInMemoryMetadataPtr()->getColumns().getOrdinary();
            select_query->addTableFunction(*const_cast<ASTPtr *>(&table_expression)); // XXX: const_cast should be avoided!
        }
        else
        {
            auto table_id = context.resolveStorageID(table_expression);
            const auto & storage = DatabaseCatalog::instance().getTable(table_id, context);
-            columns = storage->getColumns().getOrdinary();
+            columns = storage->getInMemoryMetadataPtr()->getColumns().getOrdinary();
            select_query->replaceDatabaseAndTable(table_id);
        }

--- a/src/Processors/Pipe.h
+++ b/src/Processors/Pipe.h
@ -62,12 +62,12 @@ public:

    /// Do not allow to change the table while the processors of pipe are alive.
    /// TODO: move it to pipeline.
-    void addTableLock(const TableStructureReadLockHolder & lock) { table_locks.push_back(lock); }
+    void addTableLock(const TableLockHolder & lock) { table_locks.push_back(lock); }
    /// This methods are from QueryPipeline. Needed to make conversion from pipeline to pipe possible.
    void addInterpreterContext(std::shared_ptr<Context> context) { interpreter_context.emplace_back(std::move(context)); }
    void addStorageHolder(StoragePtr storage) { storage_holders.emplace_back(std::move(storage)); }

-    const std::vector<TableStructureReadLockHolder> & getTableLocks() const { return table_locks; }
+    const std::vector<TableLockHolder> & getTableLocks() const { return table_locks; }
    const std::vector<std::shared_ptr<Context>> & getContexts() const { return interpreter_context; }
    const std::vector<StoragePtr> & getStorageHolders() const { return storage_holders; }

@ -80,7 +80,7 @@ private:
    /// It is the max number of processors which can be executed in parallel for each step. See QueryPipeline::Streams.
    size_t max_parallel_streams = 0;

-    std::vector<TableStructureReadLockHolder> table_locks;
+    std::vector<TableLockHolder> table_locks;

    /// Some processors may implicitly use Context or temporary Storage created by Interpreter.
    /// But lifetime of Streams is not nested in lifetime of Interpreters, so we have to store it here,
--- a/src/Processors/QueryPipeline.h
+++ b/src/Processors/QueryPipeline.h
@ -7,14 +7,13 @@
 #include <DataStreams/IBlockOutputStream.h>

 #include <Storages/IStorage_fwd.h>
+#include <Storages/TableLockHolder.h>

 namespace DB
 {

-class TableStructureReadLock;
-using TableStructureReadLockPtr = std::shared_ptr<TableStructureReadLock>;
-using TableStructureReadLocks = std::vector<TableStructureReadLockHolder>;

+using TableLockHolders = std::vector<TableLockHolder>;
 class Context;

 class IOutputFormat;
@ -146,7 +145,7 @@ public:

    const Block & getHeader() const { return current_header; }

-    void addTableLock(const TableStructureReadLockHolder & lock) { table_locks.push_back(lock); }
+    void addTableLock(const TableLockHolder & lock) { table_locks.push_back(lock); }
    void addInterpreterContext(std::shared_ptr<Context> context) { interpreter_context.emplace_back(std::move(context)); }
    void addStorageHolder(StoragePtr storage) { storage_holders.emplace_back(std::move(storage)); }

@ -180,7 +179,7 @@ private:
    /// because QueryPipeline is alive until query is finished.
    std::vector<std::shared_ptr<Context>> interpreter_context;
    std::vector<StoragePtr> storage_holders;
-    TableStructureReadLocks table_locks;
+    TableLockHolders table_locks;

    /// Common header for each stream.
    Block current_header;
--- a/src/Processors/QueryPlan/ReadFromStorageStep.cpp
+++ b/src/Processors/QueryPlan/ReadFromStorageStep.cpp
@ -13,7 +13,8 @@ namespace DB
 {

 ReadFromStorageStep::ReadFromStorageStep(
-    TableStructureReadLockHolder table_lock_,
+    TableLockHolder table_lock_,
+    StorageMetadataPtr & metadata_snapshot_,
    SelectQueryOptions options_,
    StoragePtr storage_,
    const Names & required_columns_,
@ -23,6 +24,7 @@ ReadFromStorageStep::ReadFromStorageStep(
    size_t max_block_size_,
    size_t max_streams_)
    : table_lock(std::move(table_lock_))
+    , metadata_snapshot(metadata_snapshot_)
    , options(std::move(options_))
    , storage(std::move(storage_))
    , required_columns(required_columns_)
@ -35,11 +37,11 @@ ReadFromStorageStep::ReadFromStorageStep(
    /// Note: we read from storage in constructor of step because we don't know real header before reading.
    /// It will be fixed when storage return QueryPlanStep itself.

-    Pipes pipes = storage->read(required_columns, query_info, *context, processing_stage, max_block_size, max_streams);
+    Pipes pipes = storage->read(required_columns, metadata_snapshot, query_info, *context, processing_stage, max_block_size, max_streams);

    if (pipes.empty())
    {
-        Pipe pipe(std::make_shared<NullSource>(storage->getSampleBlockForColumns(required_columns)));
+        Pipe pipe(std::make_shared<NullSource>(metadata_snapshot->getSampleBlockForColumns(required_columns, storage->getVirtuals(), storage->getStorageID())));

        if (query_info.prewhere_info)
        {
--- a/src/Processors/QueryPlan/ReadFromStorageStep.h
+++ b/src/Processors/QueryPlan/ReadFromStorageStep.h
@ -1,6 +1,6 @@
 #include <Processors/QueryPlan/IQueryPlanStep.h>
 #include <Core/QueryProcessingStage.h>
-#include <Storages/TableStructureLockHolder.h>
+#include <Storages/TableLockHolder.h>
 #include <Interpreters/SelectQueryOptions.h>

 namespace DB
@ -9,6 +9,9 @@ namespace DB
 class IStorage;
 using StoragePtr = std::shared_ptr<IStorage>;

+struct StorageInMemoryMetadata;
+using StorageMetadataPtr = std::shared_ptr<const StorageInMemoryMetadata>;
+
 struct SelectQueryInfo;

 struct PrewhereInfo;
@ -18,7 +21,8 @@ class ReadFromStorageStep : public IQueryPlanStep
 {
 public:
    ReadFromStorageStep(
-        TableStructureReadLockHolder table_lock,
+        TableLockHolder table_lock,
+        StorageMetadataPtr & metadata_snapshot,
        SelectQueryOptions options,
        StoragePtr storage,
        const Names & required_columns,
@ -35,7 +39,8 @@ public:
    QueryPipelinePtr updatePipeline(QueryPipelines) override;

 private:
-    TableStructureReadLockHolder table_lock;
+    TableLockHolder table_lock;
+    StorageMetadataPtr metadata_snapshot;
    SelectQueryOptions options;

    StoragePtr storage;
--- a/src/Processors/Transforms/CreatingSetsTransform.cpp
+++ b/src/Processors/Transforms/CreatingSetsTransform.cpp
@ -66,7 +66,7 @@ void CreatingSetsTransform::startSubquery(SubqueryForSet & subquery)
    elapsed_nanoseconds = 0;

    if (subquery.table)
-        table_out = subquery.table->write({}, context);
+        table_out = subquery.table->write({}, subquery.table->getInMemoryMetadataPtr(), context);

    done_with_set = !subquery.set;
    done_with_join = !subquery.join;
--- a/src/Server/HTTPHandler.cpp
+++ b/src/Server/HTTPHandler.cpp
@ -232,15 +232,12 @@ HTTPHandler::HTTPHandler(IServer & server_, const std::string & name)


 void HTTPHandler::processQuery(
+    Context & context,
    Poco::Net::HTTPServerRequest & request,
    HTMLForm & params,
    Poco::Net::HTTPServerResponse & response,
    Output & used_output)
 {
-    Context context = server.context();
-
-    CurrentThread::QueryScope query_scope(context);
-
    LOG_TRACE(log, "Request URI: {}", request.getURI());

    std::istream & istr = request.stream();
@ -683,6 +680,11 @@ void HTTPHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Ne
    setThreadName("HTTPHandler");
    ThreadStatus thread_status;

+    /// Should be initialized before anything,
+    /// For correct memory accounting.
+    Context context = server.context();
+    CurrentThread::QueryScope query_scope(context);
+
    Output used_output;

    /// In case of exception, send stack trace to client.
@ -706,7 +708,7 @@ void HTTPHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Ne
            throw Exception("The Transfer-Encoding is not chunked and there is no Content-Length header for POST request", ErrorCodes::HTTP_LENGTH_REQUIRED);
        }

-        processQuery(request, params, response, used_output);
+        processQuery(context, request, params, response, used_output);
        LOG_INFO(log, "Done processing query");
    }
    catch (...)
--- a/src/Server/HTTPHandler.h
+++ b/src/Server/HTTPHandler.h
@ -72,6 +72,7 @@ private:

    /// Also initializes 'used_output'.
    void processQuery(
+        Context & context,
        Poco::Net::HTTPServerRequest & request,
        HTMLForm & params,
        Poco::Net::HTTPServerResponse & response,
--- a/src/Server/MySQLHandler.cpp
+++ b/src/Server/MySQLHandler.cpp
@ -254,7 +254,8 @@ void MySQLHandler::comFieldList(ReadBuffer & payload)
    packet.readPayload(payload);
    String database = connection_context.getCurrentDatabase();
    StoragePtr table_ptr = DatabaseCatalog::instance().getTable({database, packet.table}, connection_context);
-    for (const NameAndTypePair & column: table_ptr->getColumns().getAll())
+    auto metadata_snapshot = table_ptr->getInMemoryMetadataPtr();
+    for (const NameAndTypePair & column : metadata_snapshot->getColumns().getAll())
    {
        ColumnDefinition column_definition(
            database, packet.table, packet.table, column.name, column.name, CharacterSet::binary, 100, ColumnType::MYSQL_TYPE_STRING, 0, 0
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@ -189,6 +189,7 @@ void TCPHandler::runImpl()
                state.logs_queue = std::make_shared<InternalTextLogsQueue>();
                state.logs_queue->max_priority = Poco::Logger::parseLevel(client_logs_level.toString());
                CurrentThread::attachInternalTextLogsQueue(state.logs_queue, client_logs_level);
+                CurrentThread::setFatalErrorCallback([this]{ sendLogs(); });
            }

            query_context->setExternalTablesInitializer([&connection_settings, this] (Context & context)
@ -213,17 +214,18 @@ void TCPHandler::runImpl()
                if (&context != &query_context.value())
                    throw Exception("Unexpected context in Input initializer", ErrorCodes::LOGICAL_ERROR);

+                auto metadata_snapshot = input_storage->getInMemoryMetadataPtr();
                state.need_receive_data_for_input = true;

                /// Send ColumnsDescription for input storage.
                if (client_revision >= DBMS_MIN_REVISION_WITH_COLUMN_DEFAULTS_METADATA
                    && query_context->getSettingsRef().input_format_defaults_for_omitted_fields)
                {
-                    sendTableColumns(input_storage->getColumns());
+                    sendTableColumns(metadata_snapshot->getColumns());
                }

                /// Send block to the client - input storage structure.
-                state.input_header = input_storage->getSampleBlock();
+                state.input_header = metadata_snapshot->getSampleBlock();
                sendData(state.input_header);
            });

@ -474,7 +476,10 @@ void TCPHandler::processInsertQuery(const Settings & connection_settings)
        if (query_context->getSettingsRef().input_format_defaults_for_omitted_fields)
        {
            if (!table_id.empty())
-                sendTableColumns(DatabaseCatalog::instance().getTable(table_id, *query_context)->getColumns());
+            {
+                auto storage_ptr = DatabaseCatalog::instance().getTable(table_id, *query_context);
+                sendTableColumns(storage_ptr->getInMemoryMetadataPtr()->getColumns());
+            }
        }
    }

@ -952,8 +957,9 @@ bool TCPHandler::receiveData(bool scalar)
                    storage = temporary_table.getTable();
                    query_context->addExternalTable(temporary_id.table_name, std::move(temporary_table));
                }
+                auto metadata_snapshot = storage->getInMemoryMetadataPtr();
                /// The data will be written directly to the table.
-                state.io.out = storage->write(ASTPtr(), *query_context);
+                state.io.out = storage->write(ASTPtr(), metadata_snapshot, *query_context);
            }
            if (state.need_receive_data_for_input)
                state.block_for_input = block;
--- a/src/Storages/Distributed/DistributedBlockOutputStream.cpp
+++ b/src/Storages/Distributed/DistributedBlockOutputStream.cpp
@ -83,18 +83,29 @@ static void writeBlockConvert(const BlockOutputStreamPtr & out, const Block & bl


 DistributedBlockOutputStream::DistributedBlockOutputStream(
-        const Context & context_, StorageDistributed & storage_, const ASTPtr & query_ast_, const ClusterPtr & cluster_,
-        bool insert_sync_, UInt64 insert_timeout_)
-        : context(context_), storage(storage_), query_ast(query_ast_), query_string(queryToString(query_ast_)),
-        cluster(cluster_), insert_sync(insert_sync_),
-        insert_timeout(insert_timeout_), log(&Poco::Logger::get("DistributedBlockOutputStream"))
+    const Context & context_,
+    StorageDistributed & storage_,
+    const StorageMetadataPtr & metadata_snapshot_,
+    const ASTPtr & query_ast_,
+    const ClusterPtr & cluster_,
+    bool insert_sync_,
+    UInt64 insert_timeout_)
+    : context(context_)
+    , storage(storage_)
+    , metadata_snapshot(metadata_snapshot_)
+    , query_ast(query_ast_)
+    , query_string(queryToString(query_ast_))
+    , cluster(cluster_)
+    , insert_sync(insert_sync_)
+    , insert_timeout(insert_timeout_)
+    , log(&Poco::Logger::get("DistributedBlockOutputStream"))
 {
 }


 Block DistributedBlockOutputStream::getHeader() const
 {
-    return storage.getSampleBlock();
+    return metadata_snapshot->getSampleBlock();
 }


@ -109,7 +120,7 @@ void DistributedBlockOutputStream::write(const Block & block)

    /* They are added by the AddingDefaultBlockOutputStream, and we will get
     * different number of columns eventually */
-    for (const auto & col : storage.getColumns().getMaterialized())
+    for (const auto & col : metadata_snapshot->getColumns().getMaterialized())
    {
        if (ordinary_block.has(col.name))
        {
--- a/src/Storages/Distributed/DistributedBlockOutputStream.h
+++ b/src/Storages/Distributed/DistributedBlockOutputStream.h
@ -2,6 +2,7 @@

 #include <Parsers/formatAST.h>
 #include <DataStreams/IBlockOutputStream.h>
+#include <Storages/StorageInMemoryMetadata.h>
 #include <Core/Block.h>
 #include <Common/PODArray.h>
 #include <Common/Throttler.h>
@ -36,8 +37,14 @@ class StorageDistributed;
 class DistributedBlockOutputStream : public IBlockOutputStream
 {
 public:
-    DistributedBlockOutputStream(const Context & context_, StorageDistributed & storage_, const ASTPtr & query_ast_,
-                                 const ClusterPtr & cluster_, bool insert_sync_, UInt64 insert_timeout_);
+    DistributedBlockOutputStream(
+        const Context & context_,
+        StorageDistributed & storage_,
+        const StorageMetadataPtr & metadata_snapshot_,
+        const ASTPtr & query_ast_,
+        const ClusterPtr & cluster_,
+        bool insert_sync_,
+        UInt64 insert_timeout_);

    Block getHeader() const override;
    void write(const Block & block) override;
@ -79,6 +86,7 @@ private:
 private:
    const Context & context;
    StorageDistributed & storage;
+    StorageMetadataPtr metadata_snapshot;
    ASTPtr query_ast;
    String query_string;
    ClusterPtr cluster;
--- a/src/Storages/IStorage.cpp
+++ b/src/Storages/IStorage.cpp
@ -19,295 +19,15 @@ namespace DB

 namespace ErrorCodes
 {
-    extern const int LOGICAL_ERROR;
-    extern const int COLUMN_QUERIED_MORE_THAN_ONCE;
-    extern const int DUPLICATE_COLUMN;
-    extern const int EMPTY_LIST_OF_COLUMNS_PASSED;
-    extern const int EMPTY_LIST_OF_COLUMNS_QUERIED;
-    extern const int NO_SUCH_COLUMN_IN_TABLE;
-    extern const int NOT_FOUND_COLUMN_IN_BLOCK;
-    extern const int TYPE_MISMATCH;
    extern const int TABLE_IS_DROPPED;
    extern const int NOT_IMPLEMENTED;
    extern const int DEADLOCK_AVOIDED;
 }

-const ColumnsDescription & IStorage::getColumns() const
-{
-    return metadata.columns;
-}
-
-const IndicesDescription & IStorage::getSecondaryIndices() const
-{
-    return metadata.secondary_indices;
-}
-
-bool IStorage::hasSecondaryIndices() const
-{
-    return !metadata.secondary_indices.empty();
-}
-
-const ConstraintsDescription & IStorage::getConstraints() const
-{
-    return metadata.constraints;
-}
-
-Block IStorage::getSampleBlock() const
-{
-    Block res;
-
-    for (const auto & column : getColumns().getAllPhysical())
-        res.insert({column.type->createColumn(), column.type, column.name});
-
-    return res;
-}
-
-Block IStorage::getSampleBlockWithVirtuals() const
-{
-    auto res = getSampleBlock();
-
-    /// Virtual columns must be appended after ordinary, because user can
-    /// override them.
-    for (const auto & column : getVirtuals())
-        res.insert({column.type->createColumn(), column.type, column.name});
-
-    return res;
-}
-
-Block IStorage::getSampleBlockNonMaterialized() const
-{
-    Block res;
-
-    for (const auto & column : getColumns().getOrdinary())
-        res.insert({column.type->createColumn(), column.type, column.name});
-
-    return res;
-}
-
-Block IStorage::getSampleBlockForColumns(const Names & column_names) const
-{
-    Block res;
-
-    std::unordered_map<String, DataTypePtr> columns_map;
-
-    NamesAndTypesList all_columns = getColumns().getAll();
-    for (const auto & elem : all_columns)
-        columns_map.emplace(elem.name, elem.type);
-
-    /// Virtual columns must be appended after ordinary, because user can
-    /// override them.
-    for (const auto & column : getVirtuals())
-        columns_map.emplace(column.name, column.type);
-
-    for (const auto & name : column_names)
-    {
-        auto it = columns_map.find(name);
-        if (it != columns_map.end())
-        {
-            res.insert({it->second->createColumn(), it->second, it->first});
-        }
-        else
-        {
-            throw Exception(
-                "Column " + backQuote(name) + " not found in table " + getStorageID().getNameForLogs(), ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
-        }
-    }
-
-    return res;
-}
-
-namespace
-{
-#if !defined(ARCADIA_BUILD)
-    using NamesAndTypesMap = google::dense_hash_map<StringRef, const IDataType *, StringRefHash>;
-    using UniqueStrings = google::dense_hash_set<StringRef, StringRefHash>;
-#else
-    using NamesAndTypesMap = google::sparsehash::dense_hash_map<StringRef, const IDataType *, StringRefHash>;
-    using UniqueStrings = google::sparsehash::dense_hash_set<StringRef, StringRefHash>;
-#endif
-
-    String listOfColumns(const NamesAndTypesList & available_columns)
-    {
-        std::stringstream ss;
-        for (auto it = available_columns.begin(); it != available_columns.end(); ++it)
-        {
-            if (it != available_columns.begin())
-                ss << ", ";
-            ss << it->name;
-        }
-        return ss.str();
-    }
-
-    NamesAndTypesMap getColumnsMap(const NamesAndTypesList & columns)
-    {
-        NamesAndTypesMap res;
-        res.set_empty_key(StringRef());
-
-        for (const auto & column : columns)
-            res.insert({column.name, column.type.get()});
-
-        return res;
-    }
-
-    UniqueStrings initUniqueStrings()
-    {
-        UniqueStrings strings;
-        strings.set_empty_key(StringRef());
-        return strings;
-    }
-}
-
-void IStorage::check(const Names & column_names, bool include_virtuals) const
-{
-    NamesAndTypesList available_columns = getColumns().getAllPhysical();
-    if (include_virtuals)
-    {
-        auto virtuals = getVirtuals();
-        available_columns.insert(available_columns.end(), virtuals.begin(), virtuals.end());
-    }
-
-    const String list_of_columns = listOfColumns(available_columns);
-
-    if (column_names.empty())
-        throw Exception("Empty list of columns queried. There are columns: " + list_of_columns, ErrorCodes::EMPTY_LIST_OF_COLUMNS_QUERIED);
-
-    const auto columns_map = getColumnsMap(available_columns);
-
-    auto unique_names = initUniqueStrings();
-    for (const auto & name : column_names)
-    {
-        if (columns_map.end() == columns_map.find(name))
-            throw Exception(
-                "There is no column with name " + backQuote(name) + " in table " + getStorageID().getNameForLogs() + ". There are columns: " + list_of_columns,
-                ErrorCodes::NO_SUCH_COLUMN_IN_TABLE);
-
-        if (unique_names.end() != unique_names.find(name))
-            throw Exception("Column " + name + " queried more than once", ErrorCodes::COLUMN_QUERIED_MORE_THAN_ONCE);
-        unique_names.insert(name);
-    }
-}
-
-void IStorage::check(const NamesAndTypesList & provided_columns) const
-{
-    const NamesAndTypesList & available_columns = getColumns().getAllPhysical();
-    const auto columns_map = getColumnsMap(available_columns);
-
-    auto unique_names = initUniqueStrings();
-    for (const NameAndTypePair & column : provided_columns)
-    {
-        auto it = columns_map.find(column.name);
-        if (columns_map.end() == it)
-            throw Exception(
-                "There is no column with name " + column.name + ". There are columns: " + listOfColumns(available_columns),
-                ErrorCodes::NO_SUCH_COLUMN_IN_TABLE);
-
-        if (!column.type->equals(*it->second))
-            throw Exception(
-                "Type mismatch for column " + column.name + ". Column has type " + it->second->getName() + ", got type "
-                    + column.type->getName(),
-                ErrorCodes::TYPE_MISMATCH);
-
-        if (unique_names.end() != unique_names.find(column.name))
-            throw Exception("Column " + column.name + " queried more than once", ErrorCodes::COLUMN_QUERIED_MORE_THAN_ONCE);
-        unique_names.insert(column.name);
-    }
-}
-
-void IStorage::check(const NamesAndTypesList & provided_columns, const Names & column_names) const
-{
-    const NamesAndTypesList & available_columns = getColumns().getAllPhysical();
-    const auto available_columns_map = getColumnsMap(available_columns);
-    const auto & provided_columns_map = getColumnsMap(provided_columns);
-
-    if (column_names.empty())
-        throw Exception(
-            "Empty list of columns queried. There are columns: " + listOfColumns(available_columns),
-            ErrorCodes::EMPTY_LIST_OF_COLUMNS_QUERIED);
-
-    auto unique_names = initUniqueStrings();
-    for (const String & name : column_names)
-    {
-        auto it = provided_columns_map.find(name);
-        if (provided_columns_map.end() == it)
-            continue;
-
-        auto jt = available_columns_map.find(name);
-        if (available_columns_map.end() == jt)
-            throw Exception(
-                "There is no column with name " + name + ". There are columns: " + listOfColumns(available_columns),
-                ErrorCodes::NO_SUCH_COLUMN_IN_TABLE);
-
-        if (!it->second->equals(*jt->second))
-            throw Exception(
-                "Type mismatch for column " + name + ". Column has type " + jt->second->getName() + ", got type " + it->second->getName(),
-                ErrorCodes::TYPE_MISMATCH);
-
-        if (unique_names.end() != unique_names.find(name))
-            throw Exception("Column " + name + " queried more than once", ErrorCodes::COLUMN_QUERIED_MORE_THAN_ONCE);
-        unique_names.insert(name);
-    }
-}
-
-void IStorage::check(const Block & block, bool need_all) const
-{
-    const NamesAndTypesList & available_columns = getColumns().getAllPhysical();
-    const auto columns_map = getColumnsMap(available_columns);
-
-    NameSet names_in_block;
-
-    block.checkNumberOfRows();
-
-    for (const auto & column : block)
-    {
-        if (names_in_block.count(column.name))
-            throw Exception("Duplicate column " + column.name + " in block", ErrorCodes::DUPLICATE_COLUMN);
-
-        names_in_block.insert(column.name);
-
-        auto it = columns_map.find(column.name);
-        if (columns_map.end() == it)
-            throw Exception(
-                "There is no column with name " + column.name + ". There are columns: " + listOfColumns(available_columns),
-                ErrorCodes::NO_SUCH_COLUMN_IN_TABLE);
-
-        if (!column.type->equals(*it->second))
-            throw Exception(
-                "Type mismatch for column " + column.name + ". Column has type " + it->second->getName() + ", got type "
-                    + column.type->getName(),
-                ErrorCodes::TYPE_MISMATCH);
-    }
-
-    if (need_all && names_in_block.size() < columns_map.size())
-    {
-        for (const auto & available_column : available_columns)
-        {
-            if (!names_in_block.count(available_column.name))
-                throw Exception("Expected column " + available_column.name, ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
-        }
-    }
-}
-
-void IStorage::setColumns(ColumnsDescription columns_)
-{
-    if (columns_.getAllPhysical().empty())
-        throw Exception("Empty list of columns passed", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED);
-    metadata.columns = std::move(columns_);
-}
-
-void IStorage::setSecondaryIndices(IndicesDescription secondary_indices_)
-{
-    metadata.secondary_indices = std::move(secondary_indices_);
-}
-
-void IStorage::setConstraints(ConstraintsDescription constraints_)
-{
-    metadata.constraints = std::move(constraints_);
-}
-
-bool IStorage::isVirtualColumn(const String & column_name) const
+bool IStorage::isVirtualColumn(const String & column_name, const StorageMetadataPtr & metadata_snapshot) const
 {
    /// Virtual column maybe overriden by real column
-    return !getColumns().has(column_name) && getVirtuals().contains(column_name);
+    return !metadata_snapshot->getColumns().has(column_name) && getVirtuals().contains(column_name);
 }

 RWLockImpl::LockHolder IStorage::tryLockTimed(
@ -326,63 +46,48 @@ RWLockImpl::LockHolder IStorage::tryLockTimed(
    return lock_holder;
 }

-TableStructureReadLockHolder IStorage::lockStructureForShare(bool will_add_new_data, const String & query_id, const SettingSeconds & acquire_timeout)
+TableLockHolder IStorage::lockForShare(const String & query_id, const SettingSeconds & acquire_timeout)
 {
-    TableStructureReadLockHolder result;
-    if (will_add_new_data)
-        result.new_data_structure_lock = tryLockTimed(new_data_structure_lock, RWLockImpl::Read, query_id, acquire_timeout);
-    result.structure_lock = tryLockTimed(structure_lock, RWLockImpl::Read, query_id, acquire_timeout);
+    TableLockHolder result = tryLockTimed(drop_lock, RWLockImpl::Read, query_id, acquire_timeout);

    if (is_dropped)
        throw Exception("Table is dropped", ErrorCodes::TABLE_IS_DROPPED);
+
    return result;
 }

-TableStructureWriteLockHolder IStorage::lockAlterIntention(const String & query_id, const SettingSeconds & acquire_timeout)
+TableLockHolder IStorage::lockForAlter(const String & query_id, const SettingSeconds & acquire_timeout)
 {
-    TableStructureWriteLockHolder result;
-    result.alter_intention_lock = tryLockTimed(alter_intention_lock, RWLockImpl::Write, query_id, acquire_timeout);
+    TableLockHolder result = tryLockTimed(alter_lock, RWLockImpl::Write, query_id, acquire_timeout);

    if (is_dropped)
        throw Exception("Table is dropped", ErrorCodes::TABLE_IS_DROPPED);
+
    return result;
 }

-void IStorage::lockStructureExclusively(TableStructureWriteLockHolder & lock_holder, const String & query_id, const SettingSeconds & acquire_timeout)
-{
-    if (!lock_holder.alter_intention_lock)
-        throw Exception("Alter intention lock for table " + getStorageID().getNameForLogs() + " was not taken. This is a bug.", ErrorCodes::LOGICAL_ERROR);

-    if (!lock_holder.new_data_structure_lock)
-        lock_holder.new_data_structure_lock = tryLockTimed(new_data_structure_lock, RWLockImpl::Write, query_id, acquire_timeout);
-    lock_holder.structure_lock = tryLockTimed(structure_lock, RWLockImpl::Write, query_id, acquire_timeout);
-}
-
-TableStructureWriteLockHolder IStorage::lockExclusively(const String & query_id, const SettingSeconds & acquire_timeout)
+TableExclusiveLockHolder IStorage::lockExclusively(const String & query_id, const SettingSeconds & acquire_timeout)
 {
-    TableStructureWriteLockHolder result;
-    result.alter_intention_lock = tryLockTimed(alter_intention_lock, RWLockImpl::Write, query_id, acquire_timeout);
+    TableExclusiveLockHolder result;
+    result.alter_lock = tryLockTimed(alter_lock, RWLockImpl::Write, query_id, acquire_timeout);

    if (is_dropped)
        throw Exception("Table is dropped", ErrorCodes::TABLE_IS_DROPPED);

-    result.new_data_structure_lock = tryLockTimed(new_data_structure_lock, RWLockImpl::Write, query_id, acquire_timeout);
-    result.structure_lock = tryLockTimed(structure_lock, RWLockImpl::Write, query_id, acquire_timeout);
+    result.drop_lock = tryLockTimed(drop_lock, RWLockImpl::Write, query_id, acquire_timeout);

    return result;
 }

 void IStorage::alter(
-    const AlterCommands & params,
-    const Context & context,
-    TableStructureWriteLockHolder & table_lock_holder)
+    const AlterCommands & params, const Context & context, TableLockHolder &)
 {
-    lockStructureExclusively(table_lock_holder, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout);
    auto table_id = getStorageID();
    StorageInMemoryMetadata new_metadata = getInMemoryMetadata();
    params.apply(new_metadata, context);
    DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(context, table_id, new_metadata);
-    setColumns(std::move(new_metadata.columns));
+    setInMemoryMetadata(new_metadata);
 }


@ -415,277 +120,4 @@ NamesAndTypesList IStorage::getVirtuals() const
    return {};
 }

-const KeyDescription & IStorage::getPartitionKey() const
-{
-    return metadata.partition_key;
-}
-
-void IStorage::setPartitionKey(const KeyDescription & partition_key_)
-{
-    metadata.partition_key = partition_key_;
-}
-
-bool IStorage::isPartitionKeyDefined() const
-{
-    return metadata.partition_key.definition_ast != nullptr;
-}
-
-bool IStorage::hasPartitionKey() const
-{
-    return !metadata.partition_key.column_names.empty();
-}
-
-Names IStorage::getColumnsRequiredForPartitionKey() const
-{
-    if (hasPartitionKey())
-        return metadata.partition_key.expression->getRequiredColumns();
-    return {};
-}
-
-const KeyDescription & IStorage::getSortingKey() const
-{
-    return metadata.sorting_key;
-}
-
-void IStorage::setSortingKey(const KeyDescription & sorting_key_)
-{
-    metadata.sorting_key = sorting_key_;
-}
-
-bool IStorage::isSortingKeyDefined() const
-{
-    return metadata.sorting_key.definition_ast != nullptr;
-}
-
-bool IStorage::hasSortingKey() const
-{
-    return !metadata.sorting_key.column_names.empty();
-}
-
-Names IStorage::getColumnsRequiredForSortingKey() const
-{
-    if (hasSortingKey())
-        return metadata.sorting_key.expression->getRequiredColumns();
-    return {};
-}
-
-Names IStorage::getSortingKeyColumns() const
-{
-    if (hasSortingKey())
-        return metadata.sorting_key.column_names;
-    return {};
-}
-
-const KeyDescription & IStorage::getPrimaryKey() const
-{
-    return metadata.primary_key;
-}
-
-void IStorage::setPrimaryKey(const KeyDescription & primary_key_)
-{
-    metadata.primary_key = primary_key_;
-}
-
-bool IStorage::isPrimaryKeyDefined() const
-{
-    return metadata.primary_key.definition_ast != nullptr;
-}
-
-bool IStorage::hasPrimaryKey() const
-{
-    return !metadata.primary_key.column_names.empty();
-}
-
-Names IStorage::getColumnsRequiredForPrimaryKey() const
-{
-    if (hasPrimaryKey())
-        return metadata.primary_key.expression->getRequiredColumns();
-    return {};
-}
-
-Names IStorage::getPrimaryKeyColumns() const
-{
-    if (!metadata.primary_key.column_names.empty())
-        return metadata.primary_key.column_names;
-    return {};
-}
-
-const KeyDescription & IStorage::getSamplingKey() const
-{
-    return metadata.sampling_key;
-}
-
-void IStorage::setSamplingKey(const KeyDescription & sampling_key_)
-{
-    metadata.sampling_key = sampling_key_;
-}
-
-
-bool IStorage::isSamplingKeyDefined() const
-{
-    return metadata.sampling_key.definition_ast != nullptr;
-}
-
-bool IStorage::hasSamplingKey() const
-{
-    return !metadata.sampling_key.column_names.empty();
-}
-
-Names IStorage::getColumnsRequiredForSampling() const
-{
-    if (hasSamplingKey())
-        return metadata.sampling_key.expression->getRequiredColumns();
-    return {};
-}
-
-TTLTableDescription IStorage::getTableTTLs() const
-{
-    std::lock_guard lock(ttl_mutex);
-    return metadata.table_ttl;
-}
-
-void IStorage::setTableTTLs(const TTLTableDescription & table_ttl_)
-{
-    std::lock_guard lock(ttl_mutex);
-    metadata.table_ttl = table_ttl_;
-}
-
-bool IStorage::hasAnyTableTTL() const
-{
-    return hasAnyMoveTTL() || hasRowsTTL();
-}
-
-TTLColumnsDescription IStorage::getColumnTTLs() const
-{
-    std::lock_guard lock(ttl_mutex);
-    return metadata.column_ttls_by_name;
-}
-
-void IStorage::setColumnTTLs(const TTLColumnsDescription & column_ttls_by_name_)
-{
-    std::lock_guard lock(ttl_mutex);
-    metadata.column_ttls_by_name = column_ttls_by_name_;
-}
-
-bool IStorage::hasAnyColumnTTL() const
-{
-    std::lock_guard lock(ttl_mutex);
-    return !metadata.column_ttls_by_name.empty();
-}
-
-TTLDescription IStorage::getRowsTTL() const
-{
-    std::lock_guard lock(ttl_mutex);
-    return metadata.table_ttl.rows_ttl;
-}
-
-bool IStorage::hasRowsTTL() const
-{
-    std::lock_guard lock(ttl_mutex);
-    return metadata.table_ttl.rows_ttl.expression != nullptr;
-}
-
-TTLDescriptions IStorage::getMoveTTLs() const
-{
-    std::lock_guard lock(ttl_mutex);
-    return metadata.table_ttl.move_ttl;
-}
-
-bool IStorage::hasAnyMoveTTL() const
-{
-    std::lock_guard lock(ttl_mutex);
-    return !metadata.table_ttl.move_ttl.empty();
-}
-
-
-ColumnDependencies IStorage::getColumnDependencies(const NameSet & updated_columns) const
-{
-    if (updated_columns.empty())
-        return {};
-
-    ColumnDependencies res;
-
-    NameSet indices_columns;
-    NameSet required_ttl_columns;
-    NameSet updated_ttl_columns;
-
-    auto add_dependent_columns = [&updated_columns](const auto & expression, auto & to_set)
-    {
-        auto requiered_columns = expression->getRequiredColumns();
-        for (const auto & dependency : requiered_columns)
-        {
-            if (updated_columns.count(dependency))
-            {
-                to_set.insert(requiered_columns.begin(), requiered_columns.end());
-                return true;
-            }
-        }
-
-        return false;
-    };
-
-    for (const auto & index : getSecondaryIndices())
-        add_dependent_columns(index.expression, indices_columns);
-
-    if (hasRowsTTL())
-    {
-        auto rows_expression = getRowsTTL().expression;
-        if (add_dependent_columns(rows_expression, required_ttl_columns))
-        {
-            /// Filter all columns, if rows TTL expression have to be recalculated.
-            for (const auto & column : getColumns().getAllPhysical())
-                updated_ttl_columns.insert(column.name);
-        }
-    }
-
-    for (const auto & [name, entry] : getColumnTTLs())
-    {
-        if (add_dependent_columns(entry.expression, required_ttl_columns))
-            updated_ttl_columns.insert(name);
-    }
-
-    for (const auto & entry : getMoveTTLs())
-        add_dependent_columns(entry.expression, required_ttl_columns);
-
-    for (const auto & column : indices_columns)
-        res.emplace(column, ColumnDependency::SKIP_INDEX);
-    for (const auto & column : required_ttl_columns)
-        res.emplace(column, ColumnDependency::TTL_EXPRESSION);
-    for (const auto & column : updated_ttl_columns)
-        res.emplace(column, ColumnDependency::TTL_TARGET);
-
-    return res;
-
-}
-
-ASTPtr IStorage::getSettingsChanges() const
-{
-    if (metadata.settings_changes)
-        return metadata.settings_changes->clone();
-    return nullptr;
-}
-
-void IStorage::setSettingsChanges(const ASTPtr & settings_changes_)
-{
-    if (settings_changes_)
-        metadata.settings_changes = settings_changes_->clone();
-    else
-        metadata.settings_changes = nullptr;
-}
-
-const SelectQueryDescription & IStorage::getSelectQuery() const
-{
-    return metadata.select;
-}
-
-void IStorage::setSelectQuery(const SelectQueryDescription & select_)
-{
-    metadata.select = select_;
-}
-
-bool IStorage::hasSelectQuery() const
-{
-    return metadata.select.select_query != nullptr;
-}
-
 }
--- a/src/Storages/IStorage.h
+++ b/src/Storages/IStorage.h
@ -8,7 +8,7 @@
 #include <Storages/IStorage_fwd.h>
 #include <Interpreters/StorageID.h>
 #include <Storages/SelectQueryInfo.h>
-#include <Storages/TableStructureLockHolder.h>
+#include <Storages/TableLockHolder.h>
 #include <Storages/CheckResults.h>
 #include <Storages/StorageInMemoryMetadata.h>
 #include <Storages/ColumnDependency.h>
@ -80,9 +80,10 @@ class IStorage : public std::enable_shared_from_this<IStorage>, public TypePromo
 {
 public:
    IStorage() = delete;
-    /// Storage fields should be initialized in separate methods like setColumns
-    /// or setTableTTLs.
-    explicit IStorage(StorageID storage_id_) : storage_id(std::move(storage_id_)) {} //-V730
+    /// Storage metadata can be set separately in setInMemoryMetadata method
+    explicit IStorage(StorageID storage_id_)
+        : storage_id(std::move(storage_id_))
+        , metadata(std::make_unique<StorageInMemoryMetadata>()) {} //-V730

    virtual ~IStorage() = default;
    IStorage(const IStorage &) = delete;
@ -101,7 +102,7 @@ public:
    virtual bool isView() const { return false; }

    /// Returns true if the storage supports queries with the SAMPLE section.
-    virtual bool supportsSampling() const { return hasSamplingKey(); }
+    virtual bool supportsSampling() const { return getInMemoryMetadataPtr()->hasSamplingKey(); }

    /// Returns true if the storage supports queries with the FINAL section.
    virtual bool supportsFinal() const { return false; }
@ -129,58 +130,32 @@ public:
    /// Example is StorageSystemNumbers.
    virtual bool hasEvenlyDistributedRead() const { return false; }

-    /// Returns true if there is set table TTL, any column TTL or any move TTL.
-    virtual bool hasAnyTTL() const { return hasAnyColumnTTL() || hasAnyTableTTL(); }

    /// Optional size information of each physical column.
    /// Currently it's only used by the MergeTree family for query optimizations.
    using ColumnSizeByName = std::unordered_map<std::string, ColumnSize>;
    virtual ColumnSizeByName getColumnSizes() const { return {}; }

-public: /// thread-unsafe part. lockStructure must be acquired
+public:

-    const ColumnsDescription & getColumns() const; /// returns combined set of columns
-    void setColumns(ColumnsDescription columns_); /// sets only real columns, possibly overwrites virtual ones.
+    /// Get mutable version (snapshot) of storage metadata. Metadata object is
+    /// multiversion, so it can be concurrently chaged, but returned copy can be
+    /// used without any locks.
+    StorageInMemoryMetadata getInMemoryMetadata() const { return *metadata.get(); }

-    void setSecondaryIndices(IndicesDescription secondary_indices_);
-    const IndicesDescription & getSecondaryIndices() const;
-    /// Has at least one non primary index
-    bool hasSecondaryIndices() const;
+    /// Get immutable version (snapshot) of storage metadata. Metadata object is
+    /// multiversion, so it can be concurrently chaged, but returned copy can be
+    /// used without any locks.
+    StorageMetadataPtr getInMemoryMetadataPtr() const { return metadata.get(); }

-    const ConstraintsDescription & getConstraints() const;
-    void setConstraints(ConstraintsDescription constraints_);
+    /// Update storage metadata. Used in ALTER or initialization of Storage.
+    /// Metadata object is multiversion, so this method can be called without
+    /// any locks.
+    void setInMemoryMetadata(const StorageInMemoryMetadata & metadata_)
+    {
+        metadata.set(std::make_unique<StorageInMemoryMetadata>(metadata_));
+    }

-    /// Storage settings
-    ASTPtr getSettingsChanges() const;
-    void setSettingsChanges(const ASTPtr & settings_changes_);
-    bool hasSettingsChanges() const { return metadata.settings_changes != nullptr; }
-
-    /// Select query for *View storages.
-    const SelectQueryDescription & getSelectQuery() const;
-    void setSelectQuery(const SelectQueryDescription & select_);
-    bool hasSelectQuery() const;
-
-    StorageInMemoryMetadata getInMemoryMetadata() const { return metadata; }
-
-    Block getSampleBlock() const; /// ordinary + materialized.
-    Block getSampleBlockWithVirtuals() const; /// ordinary + materialized + virtuals.
-    Block getSampleBlockNonMaterialized() const; /// ordinary.
-    Block getSampleBlockForColumns(const Names & column_names) const; /// ordinary + materialized + aliases + virtuals.
-
-    /// Verify that all the requested names are in the table and are set correctly:
-    /// list of names is not empty and the names do not repeat.
-    void check(const Names & column_names, bool include_virtuals = false) const;
-
-    /// Check that all the requested names are in the table and have the correct types.
-    void check(const NamesAndTypesList & columns) const;
-
-    /// Check that all names from the intersection of `names` and `columns` are in the table and have the same types.
-    void check(const NamesAndTypesList & columns, const Names & column_names) const;
-
-    /// Check that the data block contains all the columns of the table with the correct types,
-    /// contains only the columns of the table, and all the columns are different.
-    /// If |need_all| is set, then checks that all the columns of the table are in the block.
-    void check(const Block & block, bool need_all = false) const;

    /// Return list of virtual columns (like _part, _table, etc). In the vast
    /// majority of cases virtual columns are static constant part of Storage
@ -198,36 +173,39 @@ protected:

    /// Returns whether the column is virtual - by default all columns are real.
    /// Initially reserved virtual column name may be shadowed by real column.
-    bool isVirtualColumn(const String & column_name) const;
+    bool isVirtualColumn(const String & column_name, const StorageMetadataPtr & metadata_snapshot) const;


 private:
    StorageID storage_id;
    mutable std::mutex id_mutex;

-    /// TODO (alesap) just use multiversion for atomic metadata
-    mutable std::mutex ttl_mutex;
-    StorageInMemoryMetadata metadata;
+    /// Multiversion storage metadata. Allows to read/write storage metadata
+    /// without locks.
+    MultiVersionStorageMetadataPtr metadata;
 private:
    RWLockImpl::LockHolder tryLockTimed(
        const RWLock & rwlock, RWLockImpl::Type type, const String & query_id, const SettingSeconds & acquire_timeout) const;

 public:
-    /// Acquire this lock if you need the table structure to remain constant during the execution of
-    /// the query. If will_add_new_data is true, this means that the query will add new data to the table
-    /// (INSERT or a parts merge).
-    TableStructureReadLockHolder lockStructureForShare(bool will_add_new_data, const String & query_id, const SettingSeconds & acquire_timeout);
+    /// Lock table for share. This lock must be acuqired if you want to be sure,
+    /// that table will be not dropped while you holding this lock. It's used in
+    /// variety of cases starting from SELECT queries to background merges in
+    /// MergeTree.
+    TableLockHolder lockForShare(const String & query_id, const SettingSeconds & acquire_timeout);

-    /// Acquire this lock at the start of ALTER to lock out other ALTERs and make sure that only you
-    /// can modify the table structure. It can later be upgraded to the exclusive lock.
-    TableStructureWriteLockHolder lockAlterIntention(const String & query_id, const SettingSeconds & acquire_timeout);
+    /// Lock table for alter. This lock must be acuqired in ALTER queries to be
+    /// sure, that we execute only one simultaneous alter. Doesn't affect share lock.
+    TableLockHolder lockForAlter(const String & query_id, const SettingSeconds & acquire_timeout);

-    /// Upgrade alter intention lock to the full exclusive structure lock. This is done by ALTER queries
-    /// to ensure that no other query uses the table structure and it can be safely changed.
-    void lockStructureExclusively(TableStructureWriteLockHolder & lock_holder, const String & query_id, const SettingSeconds & acquire_timeout);
-
-    /// Acquire the full exclusive lock immediately. No other queries can run concurrently.
-    TableStructureWriteLockHolder lockExclusively(const String & query_id, const SettingSeconds & acquire_timeout);
+    /// Lock table exclusively. This lock must be acuired if you want to be
+    /// sure, that no other thread (SELECT, merge, ALTER, etc.) doing something
+    /// with table. For example it allows to wait all threads before DROP or
+    /// truncate query.
+    ///
+    /// NOTE: You have to be 100% sure that you need this lock. It's extremely
+    /// heavyweight and makes table irresponsive.
+    TableExclusiveLockHolder lockExclusively(const String & query_id, const SettingSeconds & acquire_timeout);

    /** Returns stage to which query is going to be processed in read() function.
      * (Normally, the function only reads the columns from the list, but in other cases,
@ -293,10 +271,14 @@ public:
      * num_streams - a recommendation, how many streams to return,
      *  if the storage can return a different number of streams.
      *
-      * It is guaranteed that the structure of the table will not change over the lifetime of the returned streams (that is, there will not be ALTER, RENAME and DROP).
+      * metadata_snapshot is consistent snapshot of table metadata, it should be
+      * passed in all parts of the returned pipeline. Storage metadata can be
+      * changed during lifetime of the returned pipeline, but the snapshot is
+      * guaranteed to be immutable.
      */
    virtual Pipes read(
        const Names & /*column_names*/,
+        const StorageMetadataPtr & /*metadata_snapshot*/,
        const SelectQueryInfo & /*query_info*/,
        const Context & /*context*/,
        QueryProcessingStage::Enum /*processed_stage*/,
@ -310,10 +292,14 @@ public:
      * Receives a description of the query, which can contain information about the data write method.
      * Returns an object by which you can write data sequentially.
      *
-      * It is guaranteed that the table structure will not change over the lifetime of the returned streams (that is, there will not be ALTER, RENAME and DROP).
+      * metadata_snapshot is consistent snapshot of table metadata, it should be
+      * passed in all parts of the returned streams. Storage metadata can be
+      * changed during lifetime of the returned streams, but the snapshot is
+      * guaranteed to be immutable.
      */
    virtual BlockOutputStreamPtr write(
        const ASTPtr & /*query*/,
+        const StorageMetadataPtr & /*metadata_snapshot*/,
        const Context & /*context*/)
    {
        throw Exception("Method write is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED);
@ -328,9 +314,13 @@ public:
    virtual void drop() {}

    /** Clear the table data and leave it empty.
-      * Must be called under lockForAlter.
+      * Must be called under exclusive lock (lockExclusively).
      */
-    virtual void truncate(const ASTPtr & /*query*/, const Context & /* context */, TableStructureWriteLockHolder &)
+    virtual void truncate(
+        const ASTPtr & /*query*/,
+        const StorageMetadataPtr & /* metadata_snapshot */,
+        const Context & /* context */,
+        TableExclusiveLockHolder &)
    {
        throw Exception("Truncate is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED);
    }
@ -352,11 +342,10 @@ public:
     */
    virtual void renameInMemory(const StorageID & new_table_id);

-    /** ALTER tables in the form of column changes that do not affect the change to Storage or its parameters.
-      * This method must fully execute the ALTER query, taking care of the locks itself.
-      * To update the table metadata on disk, this method should call InterpreterAlterQuery::updateMetadata.
+    /** ALTER tables in the form of column changes that do not affect the change
+      * to Storage or its parameters. Executes under alter lock (lockForAlter).
      */
-    virtual void alter(const AlterCommands & params, const Context & context, TableStructureWriteLockHolder & table_lock_holder);
+    virtual void alter(const AlterCommands & params, const Context & context, TableLockHolder & alter_lock_holder);

    /** Checks that alter commands can be applied to storage. For example, columns can be modified,
      * or primary key can be changes, etc.
@ -366,7 +355,7 @@ public:
    /** ALTER tables with regard to its partitions.
      * Should handle locks for each command on its own.
      */
-    virtual void alterPartition(const ASTPtr & /* query */, const PartitionCommands & /* commands */, const Context & /* context */)
+    virtual void alterPartition(const ASTPtr & /* query */, const StorageMetadataPtr & /* metadata_snapshot */, const PartitionCommands & /* commands */, const Context & /* context */)
    {
        throw Exception("Partition operations are not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED);
    }
@ -374,7 +363,13 @@ public:
    /** Perform any background work. For example, combining parts in a MergeTree type table.
      * Returns whether any work has been done.
      */
-    virtual bool optimize(const ASTPtr & /*query*/, const ASTPtr & /*partition*/, bool /*final*/, bool /*deduplicate*/, const Context & /*context*/)
+    virtual bool optimize(
+        const ASTPtr & /*query*/,
+        const StorageMetadataPtr & /*metadata_snapshot*/,
+        const ASTPtr & /*partition*/,
+        bool /*final*/,
+        bool /*deduplicate*/,
+        const Context & /*context*/)
    {
        throw Exception("Method optimize is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED);
    }
@ -421,7 +416,7 @@ public:
    virtual bool supportsIndexForIn() const { return false; }

    /// Provides a hint that the storage engine may evaluate the IN-condition by using an index.
-    virtual bool mayBenefitFromIndexForIn(const ASTPtr & /* left_in_operand */, const Context & /* query_context */) const { return false; }
+    virtual bool mayBenefitFromIndexForIn(const ASTPtr & /* left_in_operand */, const Context & /* query_context */, const StorageMetadataPtr & /* metadata_snapshot */) const { return false; }

    /// Checks validity of the data
    virtual CheckResults checkData(const ASTPtr & /* query */, const Context & /* context */) { throw Exception("Check query is not supported for " + getName() + " storage", ErrorCodes::NOT_IMPLEMENTED); }
@ -439,99 +434,9 @@ public:
    /// Returns data paths if storage supports it, empty vector otherwise.
    virtual Strings getDataPaths() const { return {}; }

-    /// Returns structure with partition key.
-    const KeyDescription & getPartitionKey() const;
-    /// Set partition key for storage (methods bellow, are just wrappers for this
-    /// struct).
-    void setPartitionKey(const KeyDescription & partition_key_);
-    /// Returns ASTExpressionList of partition key expression for storage or nullptr if there is none.
-    ASTPtr getPartitionKeyAST() const { return metadata.partition_key.definition_ast; }
-    /// Storage has user-defined (in CREATE query) partition key.
-    bool isPartitionKeyDefined() const;
-    /// Storage has partition key.
-    bool hasPartitionKey() const;
-    /// Returns column names that need to be read to calculate partition key.
-    Names getColumnsRequiredForPartitionKey() const;
-
-
-    /// Returns structure with sorting key.
-    const KeyDescription & getSortingKey() const;
-    /// Set sorting key for storage (methods bellow, are just wrappers for this
-    /// struct).
-    void setSortingKey(const KeyDescription & sorting_key_);
-    /// Returns ASTExpressionList of sorting key expression for storage or nullptr if there is none.
-    ASTPtr getSortingKeyAST() const { return metadata.sorting_key.definition_ast; }
-    /// Storage has user-defined (in CREATE query) sorting key.
-    bool isSortingKeyDefined() const;
-    /// Storage has sorting key. It means, that it contains at least one column.
-    bool hasSortingKey() const;
-    /// Returns column names that need to be read to calculate sorting key.
-    Names getColumnsRequiredForSortingKey() const;
-    /// Returns columns names in sorting key specified by user in ORDER BY
-    /// expression. For example: 'a', 'x * y', 'toStartOfMonth(date)', etc.
-    Names getSortingKeyColumns() const;
-
-    /// Returns structure with primary key.
-    const KeyDescription & getPrimaryKey() const;
-    /// Set primary key for storage (methods bellow, are just wrappers for this
-    /// struct).
-    void setPrimaryKey(const KeyDescription & primary_key_);
-    /// Returns ASTExpressionList of primary key expression for storage or nullptr if there is none.
-    ASTPtr getPrimaryKeyAST() const { return metadata.primary_key.definition_ast; }
-    /// Storage has user-defined (in CREATE query) sorting key.
-    bool isPrimaryKeyDefined() const;
-    /// Storage has primary key (maybe part of some other key). It means, that
-    /// it contains at least one column.
-    bool hasPrimaryKey() const;
-    /// Returns column names that need to be read to calculate primary key.
-    Names getColumnsRequiredForPrimaryKey() const;
-    /// Returns columns names in sorting key specified by. For example: 'a', 'x
-    /// * y', 'toStartOfMonth(date)', etc.
-    Names getPrimaryKeyColumns() const;
-
-    /// Returns structure with sampling key.
-    const KeyDescription & getSamplingKey() const;
-    /// Set sampling key for storage (methods bellow, are just wrappers for this
-    /// struct).
-    void setSamplingKey(const KeyDescription & sampling_key_);
-    /// Returns sampling expression AST for storage or nullptr if there is none.
-    ASTPtr getSamplingKeyAST() const { return metadata.sampling_key.definition_ast; }
-    /// Storage has user-defined (in CREATE query) sampling key.
-    bool isSamplingKeyDefined() const;
-    /// Storage has sampling key.
-    bool hasSamplingKey() const;
-    /// Returns column names that need to be read to calculate sampling key.
-    Names getColumnsRequiredForSampling() const;
-
-    /// Returns column names that need to be read for FINAL to work.
-    Names getColumnsRequiredForFinal() const { return getColumnsRequiredForSortingKey(); }
-
-    /// Returns columns, which will be needed to calculate dependencies (skip
-    /// indices, TTL expressions) if we update @updated_columns set of columns.
-    ColumnDependencies getColumnDependencies(const NameSet & updated_columns) const;
-
    /// Returns storage policy if storage supports it.
    virtual StoragePolicyPtr getStoragePolicy() const { return {}; }

-    /// Common tables TTLs (for rows and moves).
-    TTLTableDescription getTableTTLs() const;
-    void setTableTTLs(const TTLTableDescription & table_ttl_);
-    bool hasAnyTableTTL() const;
-
-    /// Separate TTLs for columns.
-    TTLColumnsDescription getColumnTTLs() const;
-    void setColumnTTLs(const TTLColumnsDescription & column_ttls_by_name_);
-    bool hasAnyColumnTTL() const;
-
-    /// Just wrapper for table TTLs, return rows part of table TTLs.
-    TTLDescription getRowsTTL() const;
-    bool hasRowsTTL() const;
-
-    /// Just wrapper for table TTLs, return moves (to disks or volumes) parts of
-    /// table TTL.
-    TTLDescriptions getMoveTTLs() const;
-    bool hasAnyMoveTTL() const;
-
    /// If it is possible to quickly determine exact number of rows in the table at this moment of time, then return it.
    /// Used for:
    /// - Simple count() opimization
@ -558,22 +463,18 @@ public:
    }

 private:
-    /// You always need to take the next three locks in this order.
+    /// Lock required for alter queries (lockForAlter). Always taken for write
+    /// (actually can be replaced with std::mutex, but for consistency we use
+    /// RWLock). Allows to execute only one simultaneous alter query. Also it
+    /// should be taken by DROP-like queries, to be sure, that all alters are
+    /// finished.
+    mutable RWLock alter_lock = RWLockImpl::create();

-    /// If you hold this lock exclusively, you can be sure that no other structure modifying queries
-    /// (e.g. ALTER, DROP) are concurrently executing. But queries that only read table structure
-    /// (e.g. SELECT, INSERT) can continue to execute.
-    mutable RWLock alter_intention_lock = RWLockImpl::create();
-
-    /// It is taken for share for the entire INSERT query and the entire merge of the parts (for MergeTree).
-    /// ALTER COLUMN queries acquire an exclusive lock to ensure that no new parts with the old structure
-    /// are added to the table and thus the set of parts to modify doesn't change.
-    mutable RWLock new_data_structure_lock = RWLockImpl::create();
-
-    /// Lock for the table column structure (names, types, etc.) and data path.
-    /// It is taken in exclusive mode by queries that modify them (e.g. RENAME, ALTER and DROP)
-    /// and in share mode by other queries.
-    mutable RWLock structure_lock = RWLockImpl::create();
+    /// Lock required for drop queries. Every thread that want to ensure, that
+    /// table is not dropped have to tabke this lock for read (lockForShare).
+    /// DROP-like queries take this lock for write (lockExclusively), to be sure
+    /// that all table threads finished.
+    mutable RWLock drop_lock = RWLockImpl::create();
 };

 }
--- a/src/Storages/Kafka/KafkaBlockInputStream.cpp
+++ b/src/Storages/Kafka/KafkaBlockInputStream.cpp
@ -19,15 +19,23 @@ namespace ErrorCodes
 const auto MAX_FAILED_POLL_ATTEMPTS = 10;

 KafkaBlockInputStream::KafkaBlockInputStream(
-    StorageKafka & storage_, const std::shared_ptr<Context> & context_, const Names & columns, Poco::Logger * log_, size_t max_block_size_, bool commit_in_suffix_)
+    StorageKafka & storage_,
+    const StorageMetadataPtr & metadata_snapshot_,
+    const std::shared_ptr<Context> & context_,
+    const Names & columns,
+    Poco::Logger * log_,
+    size_t max_block_size_,
+    bool commit_in_suffix_)
    : storage(storage_)
+    , metadata_snapshot(metadata_snapshot_)
    , context(context_)
    , column_names(columns)
    , log(log_)
    , max_block_size(max_block_size_)
    , commit_in_suffix(commit_in_suffix_)
-    , non_virtual_header(storage.getSampleBlockNonMaterialized())
-    , virtual_header(storage.getSampleBlockForColumns({"_topic", "_key", "_offset", "_partition", "_timestamp","_timestamp_ms","_headers.name","_headers.value"}))
+    , non_virtual_header(metadata_snapshot->getSampleBlockNonMaterialized())
+    , virtual_header(metadata_snapshot->getSampleBlockForColumns(
+            {"_topic", "_key", "_offset", "_partition", "_timestamp", "_timestamp_ms", "_headers.name", "_headers.value"}, storage.getVirtuals(), storage.getStorageID()))
 {
 }

@ -44,7 +52,7 @@ KafkaBlockInputStream::~KafkaBlockInputStream()

 Block KafkaBlockInputStream::getHeader() const
 {
-    return storage.getSampleBlockForColumns(column_names);
+    return metadata_snapshot->getSampleBlockForColumns(column_names, storage.getVirtuals(), storage.getStorageID());
 }

 void KafkaBlockInputStream::readPrefixImpl()
--- a/src/Storages/Kafka/KafkaBlockInputStream.h
+++ b/src/Storages/Kafka/KafkaBlockInputStream.h
@ -18,7 +18,13 @@ class KafkaBlockInputStream : public IBlockInputStream
 {
 public:
    KafkaBlockInputStream(
-        StorageKafka & storage_, const std::shared_ptr<Context> & context_, const Names & columns, Poco::Logger * log_, size_t max_block_size_, bool commit_in_suffix = true);
+        StorageKafka & storage_,
+        const StorageMetadataPtr & metadata_snapshot_,
+        const std::shared_ptr<Context> & context_,
+        const Names & columns,
+        Poco::Logger * log_,
+        size_t max_block_size_,
+        bool commit_in_suffix = true);
    ~KafkaBlockInputStream() override;

    String getName() const override { return storage.getName(); }
@ -33,6 +39,7 @@ public:

 private:
    StorageKafka & storage;
+    StorageMetadataPtr metadata_snapshot;
    const std::shared_ptr<Context> context;
    Names column_names;
    Poco::Logger * log;
--- a/src/Storages/Kafka/KafkaBlockOutputStream.cpp
+++ b/src/Storages/Kafka/KafkaBlockOutputStream.cpp
@ -11,13 +11,19 @@ namespace ErrorCodes
    extern const int CANNOT_CREATE_IO_BUFFER;
 }

-KafkaBlockOutputStream::KafkaBlockOutputStream(StorageKafka & storage_, const std::shared_ptr<Context> & context_) : storage(storage_), context(context_)
+KafkaBlockOutputStream::KafkaBlockOutputStream(
+    StorageKafka & storage_,
+    const StorageMetadataPtr & metadata_snapshot_,
+    const std::shared_ptr<Context> & context_)
+    : storage(storage_)
+    , metadata_snapshot(metadata_snapshot_)
+    , context(context_)
 {
 }

 Block KafkaBlockOutputStream::getHeader() const
 {
-    return storage.getSampleBlockNonMaterialized();
+    return metadata_snapshot->getSampleBlockNonMaterialized();
 }

 void KafkaBlockOutputStream::writePrefix()
--- a/src/Storages/Kafka/KafkaBlockOutputStream.h
+++ b/src/Storages/Kafka/KafkaBlockOutputStream.h
@ -10,7 +10,10 @@ namespace DB
 class KafkaBlockOutputStream : public IBlockOutputStream
 {
 public:
-    explicit KafkaBlockOutputStream(StorageKafka & storage_, const std::shared_ptr<Context> & context_);
+    explicit KafkaBlockOutputStream(
+        StorageKafka & storage_,
+        const StorageMetadataPtr & metadata_snapshot_,
+        const std::shared_ptr<Context> & context_);

    Block getHeader() const override;

@ -22,6 +25,7 @@ public:

 private:
    StorageKafka & storage;
+    StorageMetadataPtr metadata_snapshot;
    const std::shared_ptr<Context> context;
    ProducerBufferPtr buffer;
    BlockOutputStreamPtr child;
--- a/src/Storages/Kafka/StorageKafka.cpp
+++ b/src/Storages/Kafka/StorageKafka.cpp
@ -140,7 +140,9 @@ StorageKafka::StorageKafka(
    , intermediate_commit(kafka_settings->kafka_commit_every_batch.value)
    , settings_adjustments(createSettingsAdjustments())
 {
-    setColumns(columns_);
+    StorageInMemoryMetadata storage_metadata;
+    storage_metadata.setColumns(columns_);
+    setInMemoryMetadata(storage_metadata);
    task = global_context.getSchedulePool().createTask(log->name(), [this]{ threadFunc(); });
    task->deactivate();

@ -202,6 +204,7 @@ String StorageKafka::getDefaultClientId(const StorageID & table_id_)

 Pipes StorageKafka::read(
    const Names & column_names,
+    const StorageMetadataPtr & metadata_snapshot,
    const SelectQueryInfo & /* query_info */,
    const Context & context,
    QueryProcessingStage::Enum /* processed_stage */,
@ -224,7 +227,7 @@ Pipes StorageKafka::read(
        /// TODO: probably that leads to awful performance.
        /// FIXME: seems that doesn't help with extra reading and committing unprocessed messages.
        /// TODO: rewrite KafkaBlockInputStream to KafkaSource. Now it is used in other place.
-        pipes.emplace_back(std::make_shared<SourceFromInputStream>(std::make_shared<KafkaBlockInputStream>(*this, modified_context, column_names, log, 1)));
+        pipes.emplace_back(std::make_shared<SourceFromInputStream>(std::make_shared<KafkaBlockInputStream>(*this, metadata_snapshot, modified_context, column_names, log, 1)));
    }

    LOG_DEBUG(log, "Starting reading {} streams", pipes.size());
@ -232,14 +235,14 @@ Pipes StorageKafka::read(
 }


-BlockOutputStreamPtr StorageKafka::write(const ASTPtr &, const Context & context)
+BlockOutputStreamPtr StorageKafka::write(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, const Context & context)
 {
    auto modified_context = std::make_shared<Context>(context);
    modified_context->applySettingsChanges(settings_adjustments);

    if (topics.size() > 1)
        throw Exception("Can't write to Kafka table with multiple topics!", ErrorCodes::NOT_IMPLEMENTED);
-    return std::make_shared<KafkaBlockOutputStream>(*this, modified_context);
+    return std::make_shared<KafkaBlockOutputStream>(*this, metadata_snapshot, modified_context);
 }


@ -519,6 +522,7 @@ bool StorageKafka::streamToViews()
    auto table = DatabaseCatalog::instance().getTable(table_id, global_context);
    if (!table)
        throw Exception("Engine table " + table_id.getNameForLogs() + " doesn't exist.", ErrorCodes::LOGICAL_ERROR);
+    auto metadata_snapshot = getInMemoryMetadataPtr();

    // Create an INSERT query for streaming data
    auto insert = std::make_shared<ASTInsertQuery>();
@ -537,8 +541,7 @@ bool StorageKafka::streamToViews()

    for (size_t i = 0; i < num_created_consumers; ++i)
    {
-        auto stream
-            = std::make_shared<KafkaBlockInputStream>(*this, kafka_context, block_io.out->getHeader().getNames(), log, block_size, false);
+        auto stream = std::make_shared<KafkaBlockInputStream>(*this, metadata_snapshot, kafka_context, block_io.out->getHeader().getNames(), log, block_size, false);
        streams.emplace_back(stream);

        // Limit read batch to maximum block size to allow DDL
--- a/src/Storages/Kafka/StorageKafka.h
+++ b/src/Storages/Kafka/StorageKafka.h
@ -39,6 +39,7 @@ public:

    Pipes read(
        const Names & column_names,
+        const StorageMetadataPtr & /*metadata_snapshot*/,
        const SelectQueryInfo & query_info,
        const Context & context,
        QueryProcessingStage::Enum processed_stage,
@ -47,6 +48,7 @@ public:

    BlockOutputStreamPtr write(
        const ASTPtr & query,
+        const StorageMetadataPtr & /*metadata_snapshot*/,
        const Context & context) override;

    void pushReadBuffer(ConsumerBufferPtr buf);
--- a/Show More
+++ b/Show More