Merge branch 'master' into create-table-with-clone-as

2024-09-19 16:20:50 +00:00 · 2024-09-05 17:38:07 +02:00 · 2024-09-05 17:38:07 +02:00 · d0c6d8f118
commit d0c6d8f118
parent 23f0701fc8 b6572e36b4
292 changed files with 6465 additions and 1467 deletions
--- a/README.md
+++ b/README.md
@ -62,6 +62,7 @@ Other upcoming meetups
 * [Oslo Meetup](https://www.meetup.com/open-source-real-time-data-warehouse-real-time-analytics/events/302938622) - October 31
 * [Ghent Meetup](https://www.meetup.com/clickhouse-belgium-user-group/events/303049405/) - November 19
 * [Dubai Meetup](https://www.meetup.com/clickhouse-dubai-meetup-group/events/303096989/) - November 21
+* [Paris Meetup](https://www.meetup.com/clickhouse-france-user-group/events/303096434) - November 26



--- a/base/harmful/harmful.c
+++ b/base/harmful/harmful.c
@ -66,13 +66,11 @@ TRAP(gethostbyname)
 TRAP(gethostbyname2)
 TRAP(gethostent)
 TRAP(getlogin)
-TRAP(getmntent)
 TRAP(getnetbyaddr)
 TRAP(getnetbyname)
 TRAP(getnetent)
 TRAP(getnetgrent)
 TRAP(getnetgrent_r)
-TRAP(getopt)
 TRAP(getopt_long)
 TRAP(getopt_long_only)
 TRAP(getpass)
@ -133,7 +131,6 @@ TRAP(nrand48)
 TRAP(__ppc_get_timebase_freq)
 TRAP(ptsname)
 TRAP(putchar_unlocked)
-TRAP(putenv)
 TRAP(pututline)
 TRAP(pututxline)
 TRAP(putwchar_unlocked)
@ -148,7 +145,6 @@ TRAP(sethostent)
 TRAP(sethostid)
 TRAP(setkey)
 //TRAP(setlocale) // Used by replxx at startup
-TRAP(setlogmask)
 TRAP(setnetent)
 TRAP(setnetgrent)
 TRAP(setprotoent)
@ -203,7 +199,6 @@ TRAP(lgammal)
 TRAP(nftw)
 TRAP(nl_langinfo)
 TRAP(putc_unlocked)
-TRAP(rand)
 /** In  the current POSIX.1 specification (POSIX.1-2008), readdir() is not required to be thread-safe.  However, in modern
  * implementations (including the glibc implementation), concurrent calls to readdir() that specify different directory streams
  * are thread-safe.  In cases where multiple threads must read from the same directory stream, using readdir() with external
@ -288,4 +283,14 @@ TRAP(tss_get)
 TRAP(tss_set)
 TRAP(tss_delete)

+#ifndef USE_MUSL
+/// These produce duplicate symbol errors when statically linking with musl.
+/// Maybe we can remove them from the musl fork.
+TRAP(getopt)
+TRAP(putenv)
+TRAP(setlogmask)
+TRAP(rand)
+TRAP(getmntent)
+#endif
+
 #endif
--- a/base/poco/Foundation/src/Path_UNIX.cpp
+++ b/base/poco/Foundation/src/Path_UNIX.cpp
@ -48,25 +48,17 @@ std::string PathImpl::currentImpl()
 std::string PathImpl::homeImpl()
 {
 	std::string path;
-#if defined(_POSIX_C_SOURCE) || defined(_BSD_SOURCE) || defined(_POSIX_C_SOURCE)
 	size_t buf_size = 1024;     // Same as glibc use for getpwuid
 	std::vector<char> buf(buf_size);
 	struct passwd res;
 	struct passwd* pwd = nullptr;

 	getpwuid_r(getuid(), &res, buf.data(), buf_size, &pwd);
-#else
-	struct passwd* pwd = getpwuid(getuid());
-#endif
 	if (pwd)
 		path = pwd->pw_dir;
 	else
 	{
-#if defined(_POSIX_C_SOURCE) || defined(_BSD_SOURCE) || defined(_POSIX_C_SOURCE)
 		getpwuid_r(getuid(), &res, buf.data(), buf_size, &pwd);
-#else
-		pwd = getpwuid(geteuid());
-#endif
 		if (pwd)
 			path = pwd->pw_dir;
 		else
@ -82,7 +74,7 @@ std::string PathImpl::configHomeImpl()
 {
 	std::string path = PathImpl::homeImpl();
 	std::string::size_type n = path.size();
-	if (n > 0 && path[n - 1] == '/') 
+	if (n > 0 && path[n - 1] == '/')
 #if POCO_OS == POCO_OS_MAC_OS_X
 	  path.append("Library/Preferences/");
 #else
@ -97,7 +89,7 @@ std::string PathImpl::dataHomeImpl()
 {
 	std::string path = PathImpl::homeImpl();
 	std::string::size_type n = path.size();
-	if (n > 0 && path[n - 1] == '/') 
+	if (n > 0 && path[n - 1] == '/')
 #if POCO_OS == POCO_OS_MAC_OS_X
 	  path.append("Library/Application Support/");
 #else
@ -112,7 +104,7 @@ std::string PathImpl::cacheHomeImpl()
 {
 	std::string path = PathImpl::homeImpl();
 	std::string::size_type n = path.size();
-	if (n > 0 && path[n - 1] == '/') 
+	if (n > 0 && path[n - 1] == '/')
 #if POCO_OS == POCO_OS_MAC_OS_X
 	  path.append("Library/Caches/");
 #else
@ -127,7 +119,7 @@ std::string PathImpl::tempHomeImpl()
 {
 	std::string path = PathImpl::homeImpl();
 	std::string::size_type n = path.size();
-	if (n > 0 && path[n - 1] == '/') 
+	if (n > 0 && path[n - 1] == '/')
 #if POCO_OS == POCO_OS_MAC_OS_X
 	  path.append("Library/Caches/");
 #else
@ -159,7 +151,7 @@ std::string PathImpl::tempImpl()
 std::string PathImpl::configImpl()
 {
 	std::string path;
-	
+
 #if POCO_OS == POCO_OS_MAC_OS_X
 	  path = "/Library/Preferences/";
 #else
--- a/base/poco/Net/include/Poco/Net/HTTPSession.h
+++ b/base/poco/Net/include/Poco/Net/HTTPSession.h
@ -19,6 +19,8 @@


 #include <ios>
+#include <memory>
+#include <functional>
 #include "Poco/Any.h"
 #include "Poco/Buffer.h"
 #include "Poco/Exception.h"
@ -33,6 +35,27 @@ namespace Net
 {


+    class IHTTPSessionDataHooks
+    /// Interface to control stream of data bytes being sent or received though socket by HTTPSession
+    /// It allows to monitor, throttle and schedule data streams with syscall granulatrity
+    {
+    public:
+        virtual ~IHTTPSessionDataHooks() = default;
+
+        virtual void atStart(int bytes) = 0;
+        /// Called before sending/receiving data `bytes` to/from socket.
+
+        virtual void atFinish(int bytes) = 0;
+        /// Called when sending/receiving of data `bytes` is successfully finished.
+
+        virtual void atFail() = 0;
+        /// If an error occurred during send/receive `fail()` is called instead of `finish()`.
+    };
+
+
+    using HTTPSessionDataHooksPtr = std::shared_ptr<IHTTPSessionDataHooks>;
+
+
    class Net_API HTTPSession
    /// HTTPSession implements basic HTTP session management
    /// for both HTTP clients and HTTP servers.
@ -73,6 +96,12 @@ namespace Net
        Poco::Timespan getReceiveTimeout() const;
        /// Returns receive timeout for the HTTP session.

+        void setSendDataHooks(const HTTPSessionDataHooksPtr & sendDataHooks = {});
+        /// Sets data hooks that will be called on every sent to the socket.
+
+        void setReceiveDataHooks(const HTTPSessionDataHooksPtr & receiveDataHooks = {});
+        /// Sets data hooks that will be called on every receive from the socket.
+
        bool connected() const;
        /// Returns true if the underlying socket is connected.

@ -211,6 +240,10 @@ namespace Net
        Poco::Exception * _pException;
        Poco::Any _data;

+        // Data hooks
+        HTTPSessionDataHooksPtr _sendDataHooks;
+        HTTPSessionDataHooksPtr _receiveDataHooks;
+
        friend class HTTPStreamBuf;
        friend class HTTPHeaderStreamBuf;
        friend class HTTPFixedLengthStreamBuf;
@ -246,6 +279,16 @@ namespace Net
        return _receiveTimeout;
    }

+    inline void HTTPSession::setSendDataHooks(const HTTPSessionDataHooksPtr & sendDataHooks)
+    {
+        _sendDataHooks = sendDataHooks;
+    }
+
+    inline void HTTPSession::setReceiveDataHooks(const HTTPSessionDataHooksPtr & receiveDataHooks)
+    {
+        _receiveDataHooks = receiveDataHooks;
+    }
+
    inline StreamSocket & HTTPSession::socket()
    {
        return _socket;
--- a/base/poco/Net/src/HTTPSession.cpp
+++ b/base/poco/Net/src/HTTPSession.cpp
@ -128,14 +128,14 @@ int HTTPSession::get()
 {
 	if (_pCurrent == _pEnd)
 		refill();
-	
+
 	if (_pCurrent < _pEnd)
 		return *_pCurrent++;
 	else
 		return std::char_traits<char>::eof();
 }

-	
+
 int HTTPSession::peek()
 {
 	if (_pCurrent == _pEnd)
@ -147,7 +147,7 @@ int HTTPSession::peek()
 		return std::char_traits<char>::eof();
 }

-	
+
 int HTTPSession::read(char* buffer, std::streamsize length)
 {
 	if (_pCurrent < _pEnd)
@ -166,10 +166,17 @@ int HTTPSession::write(const char* buffer, std::streamsize length)
 {
 	try
 	{
-		return _socket.sendBytes(buffer, (int) length);
+		if (_sendDataHooks)
+			_sendDataHooks->atStart((int) length);
+		int result = _socket.sendBytes(buffer, (int) length);
+		if (_sendDataHooks)
+			_sendDataHooks->atFinish(result);
+		return result;
 	}
 	catch (Poco::Exception& exc)
 	{
+		if (_sendDataHooks)
+			_sendDataHooks->atFail();
 		setException(exc);
 		throw;
 	}
@ -180,10 +187,17 @@ int HTTPSession::receive(char* buffer, int length)
 {
 	try
 	{
-		return _socket.receiveBytes(buffer, length);
+		if (_receiveDataHooks)
+			_receiveDataHooks->atStart(length);
+		int result = _socket.receiveBytes(buffer, length);
+		if (_receiveDataHooks)
+			_receiveDataHooks->atFinish(result);
+		return result;
 	}
 	catch (Poco::Exception& exc)
 	{
+		if (_receiveDataHooks)
+			_receiveDataHooks->atFail();
 		setException(exc);
 		throw;
 	}
--- a/base/poco/Net/src/SocketImpl.cpp
+++ b/base/poco/Net/src/SocketImpl.cpp
@ -63,7 +63,7 @@ bool checkIsBrokenTimeout()

 SocketImpl::SocketImpl():
 	_sockfd(POCO_INVALID_SOCKET),
-	_blocking(true), 
+	_blocking(true),
 	_isBrokenTimeout(checkIsBrokenTimeout())
 {
 }
@ -82,7 +82,7 @@ SocketImpl::~SocketImpl()
 	close();
 }

-	
+
 SocketImpl* SocketImpl::acceptConnection(SocketAddress& clientAddr)
 {
 	if (_sockfd == POCO_INVALID_SOCKET) throw InvalidSocketException();
@ -118,7 +118,7 @@ void SocketImpl::connect(const SocketAddress& address)
 		rc = ::connect(_sockfd, address.addr(), address.length());
 	}
 	while (rc != 0 && lastError() == POCO_EINTR);
-	if (rc != 0) 
+	if (rc != 0)
 	{
 		int err = lastError();
 		error(err, address.toString());
@ -205,7 +205,7 @@ void SocketImpl::bind6(const SocketAddress& address, bool reuseAddress, bool reu
 #if defined(POCO_HAVE_IPv6)
 	if (address.family() != SocketAddress::IPv6)
 		throw Poco::InvalidArgumentException("SocketAddress must be an IPv6 address");
-		
+
 	if (_sockfd == POCO_INVALID_SOCKET)
 	{
 		init(address.af());
@ -226,11 +226,11 @@ void SocketImpl::bind6(const SocketAddress& address, bool reuseAddress, bool reu
 #endif
 }

-	
+
 void SocketImpl::listen(int backlog)
 {
 	if (_sockfd == POCO_INVALID_SOCKET) throw InvalidSocketException();
-	
+
 	int rc = ::listen(_sockfd, backlog);
 	if (rc != 0) error();
 }
@ -254,7 +254,7 @@ void SocketImpl::shutdownReceive()
 	if (rc != 0) error();
 }

-	
+
 void SocketImpl::shutdownSend()
 {
 	if (_sockfd == POCO_INVALID_SOCKET) throw InvalidSocketException();
@ -263,7 +263,7 @@ void SocketImpl::shutdownSend()
 	if (rc != 0) error();
 }

-	
+
 void SocketImpl::shutdown()
 {
 	if (_sockfd == POCO_INVALID_SOCKET) throw InvalidSocketException();
@ -318,7 +318,7 @@ int SocketImpl::receiveBytes(void* buffer, int length, int flags)
 				throw TimeoutException();
 		}
 	}
-	
+
 	int rc;
 	do
 	{
@ -326,7 +326,7 @@ int SocketImpl::receiveBytes(void* buffer, int length, int flags)
 		rc = ::recv(_sockfd, reinterpret_cast<char*>(buffer), length, flags);
 	}
 	while (blocking && rc < 0 && lastError() == POCO_EINTR);
-	if (rc < 0) 
+	if (rc < 0)
 	{
 		int err = lastError();
 		if ((err == POCO_EAGAIN || err == POCO_EWOULDBLOCK) && !blocking)
@ -364,7 +364,7 @@ int SocketImpl::receiveFrom(void* buffer, int length, SocketAddress& address, in
 				throw TimeoutException();
 		}
 	}
-	
+
 	sockaddr_storage abuffer;
 	struct sockaddr* pSA = reinterpret_cast<struct sockaddr*>(&abuffer);
 	poco_socklen_t saLen = sizeof(abuffer);
@ -451,7 +451,7 @@ bool SocketImpl::pollImpl(Poco::Timespan& remainingTime, int mode)
 	}
 	while (rc < 0 && lastError() == POCO_EINTR);
 	if (rc < 0) error();
-	return rc > 0; 
+	return rc > 0;

 #else

@ -494,7 +494,7 @@ bool SocketImpl::pollImpl(Poco::Timespan& remainingTime, int mode)
 	}
 	while (rc < 0 && errorCode == POCO_EINTR);
 	if (rc < 0) error(errorCode);
-	return rc > 0; 
+	return rc > 0;

 #endif // POCO_HAVE_FD_POLL
 }
@ -504,13 +504,13 @@ bool SocketImpl::poll(const Poco::Timespan& timeout, int mode)
 	Poco::Timespan remainingTime(timeout);
 	return pollImpl(remainingTime, mode);
 }
-	
+
 void SocketImpl::setSendBufferSize(int size)
 {
 	setOption(SOL_SOCKET, SO_SNDBUF, size);
 }

-	
+
 int SocketImpl::getSendBufferSize()
 {
 	int result;
@ -524,7 +524,7 @@ void SocketImpl::setReceiveBufferSize(int size)
 	setOption(SOL_SOCKET, SO_RCVBUF, size);
 }

-	
+
 int SocketImpl::getReceiveBufferSize()
 {
 	int result;
@ -570,7 +570,7 @@ Poco::Timespan SocketImpl::getReceiveTimeout()
 	return result;
 }

-	
+
 SocketAddress SocketImpl::address()
 {
 	if (_sockfd == POCO_INVALID_SOCKET) throw InvalidSocketException();
@ -581,7 +581,7 @@ SocketAddress SocketImpl::address()
 	int rc = ::getsockname(_sockfd, pSA, &saLen);
 	if (rc == 0)
 		return SocketAddress(pSA, saLen);
-	else 
+	else
 		error();
 	return SocketAddress();
 }
--- a/base/poco/Util/CMakeLists.txt
+++ b/base/poco/Util/CMakeLists.txt
@ -18,4 +18,4 @@ target_compile_options (_poco_util
        -Wno-zero-as-null-pointer-constant
 )
 target_include_directories (_poco_util SYSTEM PUBLIC "include")
-target_link_libraries (_poco_util PUBLIC Poco::JSON Poco::XML)
+target_link_libraries (_poco_util PUBLIC Poco::JSON Poco::XML Poco::Net)
--- a/base/poco/Util/include/Poco/Util/AbstractConfiguration.h
+++ b/base/poco/Util/include/Poco/Util/AbstractConfiguration.h
@ -241,6 +241,20 @@ namespace Util
        /// If the value contains references to other properties (${<property>}), these
        /// are expanded.

+        std::string getHost(const std::string & key) const;
+        /// Returns the string value of the host property with the given name.
+        /// Throws a NotFoundException if the key does not exist.
+        /// Throws a SyntaxException if the property is not a valid host (IP address or domain).
+        /// If the value contains references to other properties (${<property>}), these
+        /// are expanded.
+
+        std::string getHost(const std::string & key, const std::string & defaultValue) const;
+        /// If a property with the given key exists, returns the host property's string value,
+        /// otherwise returns the given default value.
+        /// Throws a SyntaxException if the property is not a valid host (IP address or domain).
+        /// If the value contains references to other properties (${<property>}), these
+        /// are expanded.
+
        virtual void setString(const std::string & key, const std::string & value);
        /// Sets the property with the given key to the given value.
        /// An already existing value for the key is overwritten.
@ -339,12 +353,35 @@ namespace Util
        static bool parseBool(const std::string & value);
        void setRawWithEvent(const std::string & key, std::string value);

+        static void checkHostValidity(const std::string & value);
+        /// Throws a SyntaxException if the value is not a valid host (IP address or domain).
+
        virtual ~AbstractConfiguration();

    private:
        std::string internalExpand(const std::string & value) const;
        std::string uncheckedExpand(const std::string & value) const;

+        static bool isValidIPv4Address(const std::string & value);
+        /// IPv4 address considered valid if it is "0.0.0.0" or one of those,
+        /// defined by inet_aton() or inet_addr()
+
+        static bool isValidIPv6Address(const std::string & value);
+        /// IPv6 address considered valid if it is "::" or one of those,
+        /// defined by inet_pton() with AF_INET6 flag
+        /// (in this case it may have scope id and may be surrounded by '[', ']')
+
+        static bool isValidDomainName(const std::string & value);
+        /// <domain> ::= <subdomain> [ "." ]
+        /// <subdomain> ::= <label> | <subdomain> "." <label>
+        /// <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
+        /// <ldh-str> ::= <let-dig-hyp> | <let-dig-hyp> <ldh-str>
+        /// <let-dig-hyp> ::= <let-dig> | "-"
+        /// <let-dig> ::= <letter> | <digit>
+        /// <letter> ::= any one of the 52 alphabetic characters A through Z in
+        /// upper case and a through z in lower case
+        /// <digit> ::= any one of the ten digits 0 through 9
+
        AbstractConfiguration(const AbstractConfiguration &);
        AbstractConfiguration & operator=(const AbstractConfiguration &);

--- a/base/poco/Util/src/AbstractConfiguration.cpp
+++ b/base/poco/Util/src/AbstractConfiguration.cpp
@ -18,6 +18,7 @@
 #include "Poco/NumberParser.h"
 #include "Poco/NumberFormatter.h"
 #include "Poco/String.h"
+#include "Poco/Net/IPAddressImpl.h"


 using Poco::Mutex;
@ -263,6 +264,41 @@ bool AbstractConfiguration::getBool(const std::string& key, bool defaultValue) c
 }


+std::string AbstractConfiguration::getHost(const std::string& key) const
+{
+	Mutex::ScopedLock lock(_mutex);
+
+	std::string value;
+	if (getRaw(key, value))
+	{
+		std::string expandedValue = internalExpand(value);
+		checkHostValidity(expandedValue);
+		return expandedValue;
+	}
+	else
+		throw NotFoundException(key);
+}
+
+
+std::string AbstractConfiguration::getHost(const std::string& key, const std::string& defaultValue) const
+{
+	Mutex::ScopedLock lock(_mutex);
+
+	std::string value;
+	if (getRaw(key, value))
+	{
+		std::string expandedValue = internalExpand(value);
+		checkHostValidity(expandedValue);
+		return expandedValue;
+	}
+	else
+	{
+		checkHostValidity(defaultValue);
+		return defaultValue;
+	}
+}
+
+
 void AbstractConfiguration::setString(const std::string& key, const std::string& value)
 {
 	setRawWithEvent(key, value);
@ -529,4 +565,68 @@ void AbstractConfiguration::setRawWithEvent(const std::string& key, std::string
 }


+void AbstractConfiguration::checkHostValidity(const std::string& value)
+{
+	if (!isValidIPv4Address(value) && !isValidIPv6Address(value) && !isValidDomainName(value))
+	{
+		throw SyntaxException("Property is not a valid host name", value);
+	}
+}
+
+
+bool AbstractConfiguration::isValidIPv4Address(const std::string& value)
+{
+	using Poco::Net::Impl::IPv4AddressImpl;
+	IPv4AddressImpl empty4 = IPv4AddressImpl();
+
+	IPv4AddressImpl ipAddress = IPv4AddressImpl::parse(value);
+	return ipAddress != empty4 || value == "0.0.0.0";
+}
+
+
+bool AbstractConfiguration::isValidIPv6Address(const std::string& value)
+{
+#if defined(POCO_HAVE_IPv6)
+	using Poco::Net::Impl::IPv6AddressImpl;
+	IPv6AddressImpl empty6 = IPv6AddressImpl();
+
+	IPv6AddressImpl ipAddress = IPv6AddressImpl::parse(value);
+	return ipAddress != empty6 || value == "::";
+#else
+	return false;
+#endif
+}
+
+
+bool AbstractConfiguration::isValidDomainName(const std::string& value)
+{
+	if (value.empty() || value == "." || value.length() > 253)
+		return false;
+	int labelLength = 0;
+	char oldChar = 0;
+
+	for (char ch : value)
+	{
+		if (ch == '.')
+		{
+			if (labelLength == 0 || labelLength > 63 || oldChar == '-')
+				return false;
+			labelLength = 0;
+		}
+		else if (isalnum(ch) || ch == '-')
+		{
+			if (labelLength == 0 && (ch == '-' || isdigit(ch)))
+				return false;
+			++labelLength;
+		}
+		else
+		{
+			return false;
+		}
+		oldChar = ch;
+	}
+	return oldChar == '.' || (labelLength > 0 && labelLength <= 63 && oldChar != '-');
+}
+
+
 } } // namespace Poco::Util
--- a/contrib/libfiu
+++ b/contrib/libfiu
@ -1 +1 @@
-Subproject commit b85edbde4cf974b1b40d27828a56f0505f4e2ee5
+Subproject commit a1290d8cd3d7b4541d6c976e0a54f572ac03f2a3
--- a/contrib/llvm-project-cmake/CMakeLists.txt
+++ b/contrib/llvm-project-cmake/CMakeLists.txt
@ -140,6 +140,12 @@ if (CMAKE_CROSSCOMPILING)
    message (STATUS "CROSS COMPILING SET LLVM HOST TRIPLE ${LLVM_HOST_TRIPLE}")
 endif()

+# llvm-project/llvm/cmake/config-ix.cmake does a weird thing: it defines _LARGEFILE64_SOURCE,
+# then checks if lseek64() function exists, then undefines _LARGEFILE64_SOURCE.
+# Then the actual code that uses this function *doesn't* define _LARGEFILE64_SOURCE, so lseek64()
+# may not exist and compilation fails. This happens with musl.
+add_compile_definitions("_LARGEFILE64_SOURCE")
+
 add_subdirectory ("${LLVM_SOURCE_DIR}" "${LLVM_BINARY_DIR}")

 set_directory_properties (PROPERTIES
--- a/contrib/sysroot
+++ b/contrib/sysroot
@ -1 +1 @@
-Subproject commit cc385041b226d1fc28ead14dbab5d40a5f821dd8
+Subproject commit 5be834147d5b5dd77ca2b821f356982029320513
--- a/docker/keeper/Dockerfile
+++ b/docker/keeper/Dockerfile
@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="24.8.2.3"
+ARG VERSION="24.8.3.59"
 ARG PACKAGES="clickhouse-keeper"
 ARG DIRECT_DOWNLOAD_URLS=""

--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="24.8.2.3"
+ARG VERSION="24.8.3.59"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 ARG DIRECT_DOWNLOAD_URLS=""

--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@ -28,7 +28,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list

 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
-ARG VERSION="24.8.2.3"
+ARG VERSION="24.8.3.59"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"

 #docker-official-library:off
--- a/docker/test/performance-comparison/run.sh
+++ b/docker/test/performance-comparison/run.sh
@ -13,7 +13,8 @@ entry="/usr/share/clickhouse-test/performance/scripts/entrypoint.sh"
 # https://www.kernel.org/doc/Documentation/filesystems/tmpfs.txt
 # Double-escaped backslashes are a tribute to the engineering wonder of docker --
 # it gives '/bin/sh: 1: [bash,: not found' otherwise.
-numactl --hardware
+echo > compare.log
+numactl --hardware | tee -a compare.log
 node=$(( RANDOM % $(numactl --hardware | sed -n 's/^.*available:\(.*\)nodes.*$/\1/p') ));
-echo Will bind to NUMA node $node;
+echo Will bind to NUMA node $node | tee -a compare.log
 numactl --cpunodebind=$node --membind=$node $entry
--- a/docs/changelogs/v24.3.10.33-lts.md
+++ b/docs/changelogs/v24.3.10.33-lts.md
@ -0,0 +1,32 @@
+---
+sidebar_position: 1
+sidebar_label: 2024
+---
+
+# 2024 Changelog
+
+### ClickHouse release v24.3.10.33-lts (37b6502ebf0) FIXME as compared to v24.3.9.5-lts (a939270465e)
+
+#### Improvement
+* Backported in [#68870](https://github.com/ClickHouse/ClickHouse/issues/68870): Make allow_experimental_analyzer be controlled by the initiator for distributed queries. This ensures compatibility and correctness during operations in mixed version clusters. [#65777](https://github.com/ClickHouse/ClickHouse/pull/65777) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Backported in [#69095](https://github.com/ClickHouse/ClickHouse/issues/69095): Support for the Spanish language in the embedded dictionaries. [#69035](https://github.com/ClickHouse/ClickHouse/pull/69035) ([Vasily Okunev](https://github.com/VOkunev)).
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+* Backported in [#68995](https://github.com/ClickHouse/ClickHouse/issues/68995): Fix the upper bound of the function `fromModifiedJulianDay`. It was supposed to be `9999-12-31` but was mistakenly set to `9999-01-01`. [#67583](https://github.com/ClickHouse/ClickHouse/pull/67583) ([PHO](https://github.com/depressed-pho)).
+* Backported in [#68844](https://github.com/ClickHouse/ClickHouse/issues/68844): Fixed crash in Parquet filtering when data types in the file substantially differ from requested types (e.g. `... FROM file('a.parquet', Parquet, 'x String')`, but the file has `x Int64`). Without this fix, use `input_format_parquet_filter_push_down = 0` as a workaround. [#68131](https://github.com/ClickHouse/ClickHouse/pull/68131) ([Michael Kolupaev](https://github.com/al13n321)).
+* Backported in [#68881](https://github.com/ClickHouse/ClickHouse/issues/68881): Fixes [#50868](https://github.com/ClickHouse/ClickHouse/issues/50868). Small DateTime64 constant values returned by a nested subquery inside a distributed query were wrongly transformed to Nulls, thus causing errors and possible incorrect query results. [#68323](https://github.com/ClickHouse/ClickHouse/pull/68323) ([Shankar](https://github.com/shiyer7474)).
+* Backported in [#69054](https://github.com/ClickHouse/ClickHouse/issues/69054): Added back virtual columns ` _table` and `_database` to distributed tables. They were available until version 24.3. [#68672](https://github.com/ClickHouse/ClickHouse/pull/68672) ([Anton Popov](https://github.com/CurtizJ)).
+* Backported in [#68856](https://github.com/ClickHouse/ClickHouse/issues/68856): Fix possible error `Size of permutation (0) is less than required (...)` during Variant column permutation. [#68681](https://github.com/ClickHouse/ClickHouse/pull/68681) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#69152](https://github.com/ClickHouse/ClickHouse/issues/69152): Fix possible wrong result during anyHeavy state merge. [#68950](https://github.com/ClickHouse/ClickHouse/pull/68950) ([Raúl Marín](https://github.com/Algunenano)).
+* Backported in [#69112](https://github.com/ClickHouse/ClickHouse/issues/69112): Fix logical error when we have empty async insert. [#69080](https://github.com/ClickHouse/ClickHouse/pull/69080) ([Han Fei](https://github.com/hanfei1991)).
+
+#### NO CL CATEGORY
+
+* Backported in [#68938](https://github.com/ClickHouse/ClickHouse/issues/68938):. [#68897](https://github.com/ClickHouse/ClickHouse/pull/68897) ([Alexander Gololobov](https://github.com/davenger)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Backported in [#68826](https://github.com/ClickHouse/ClickHouse/issues/68826): Turn off fault injection for insert in `01396_inactive_replica_cleanup_nodes_zookeeper`. [#68715](https://github.com/ClickHouse/ClickHouse/pull/68715) ([alesapin](https://github.com/alesapin)).
+* Backported in [#68754](https://github.com/ClickHouse/ClickHouse/issues/68754): To make patch release possible from every commit on release branch, package_debug build is required and must not be skipped. [#68750](https://github.com/ClickHouse/ClickHouse/pull/68750) ([Max K.](https://github.com/maxknv)).
+* Backported in [#69044](https://github.com/ClickHouse/ClickHouse/issues/69044): Fix 01114_database_atomic flakiness. [#68930](https://github.com/ClickHouse/ClickHouse/pull/68930) ([Raúl Marín](https://github.com/Algunenano)).
+
--- a/docs/changelogs/v24.5.7.31-stable.md
+++ b/docs/changelogs/v24.5.7.31-stable.md
@ -0,0 +1,29 @@
+---
+sidebar_position: 1
+sidebar_label: 2024
+---
+
+# 2024 Changelog
+
+### ClickHouse release v24.5.7.31-stable (6c185e9aec1) FIXME as compared to v24.5.6.45-stable (bdca8604c29)
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+* Backported in [#68564](https://github.com/ClickHouse/ClickHouse/issues/68564): Fix indexHint function case found by fuzzer. [#66286](https://github.com/ClickHouse/ClickHouse/pull/66286) ([Anton Popov](https://github.com/CurtizJ)).
+* Backported in [#68996](https://github.com/ClickHouse/ClickHouse/issues/68996): Fix the upper bound of the function `fromModifiedJulianDay`. It was supposed to be `9999-12-31` but was mistakenly set to `9999-01-01`. [#67583](https://github.com/ClickHouse/ClickHouse/pull/67583) ([PHO](https://github.com/depressed-pho)).
+* Backported in [#68865](https://github.com/ClickHouse/ClickHouse/issues/68865): Fixed crash in Parquet filtering when data types in the file substantially differ from requested types (e.g. `... FROM file('a.parquet', Parquet, 'x String')`, but the file has `x Int64`). Without this fix, use `input_format_parquet_filter_push_down = 0` as a workaround. [#68131](https://github.com/ClickHouse/ClickHouse/pull/68131) ([Michael Kolupaev](https://github.com/al13n321)).
+* Backported in [#69004](https://github.com/ClickHouse/ClickHouse/issues/69004): After https://github.com/ClickHouse/ClickHouse/pull/61984 `schema_inference_make_columns_nullable=0` still can make columns `Nullable` in Parquet/Arrow formats. The change was backward incompatible and users noticed the changes in the behaviour. This PR makes `schema_inference_make_columns_nullable=0` to work as before (no Nullable columns will be inferred) and introduces new value `auto` for this setting that will make columns `Nullable` only if data has information about nullability. [#68298](https://github.com/ClickHouse/ClickHouse/pull/68298) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#68882](https://github.com/ClickHouse/ClickHouse/issues/68882): Fixes [#50868](https://github.com/ClickHouse/ClickHouse/issues/50868). Small DateTime64 constant values returned by a nested subquery inside a distributed query were wrongly transformed to Nulls, thus causing errors and possible incorrect query results. [#68323](https://github.com/ClickHouse/ClickHouse/pull/68323) ([Shankar](https://github.com/shiyer7474)).
+* Backported in [#69023](https://github.com/ClickHouse/ClickHouse/issues/69023): Added back virtual columns ` _table` and `_database` to distributed tables. They were available until version 24.3. [#68672](https://github.com/ClickHouse/ClickHouse/pull/68672) ([Anton Popov](https://github.com/CurtizJ)).
+* Backported in [#68858](https://github.com/ClickHouse/ClickHouse/issues/68858): Fix possible error `Size of permutation (0) is less than required (...)` during Variant column permutation. [#68681](https://github.com/ClickHouse/ClickHouse/pull/68681) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#68784](https://github.com/ClickHouse/ClickHouse/issues/68784): Fix issue with materialized constant keys when hashing maps with arrays as keys in functions `sipHash(64/128)Keyed`. [#68731](https://github.com/ClickHouse/ClickHouse/pull/68731) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
+* Backported in [#69154](https://github.com/ClickHouse/ClickHouse/issues/69154): Fix possible wrong result during anyHeavy state merge. [#68950](https://github.com/ClickHouse/ClickHouse/pull/68950) ([Raúl Marín](https://github.com/Algunenano)).
+
+#### NO CL CATEGORY
+
+* Backported in [#68940](https://github.com/ClickHouse/ClickHouse/issues/68940):. [#68897](https://github.com/ClickHouse/ClickHouse/pull/68897) ([Alexander Gololobov](https://github.com/davenger)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Backported in [#68828](https://github.com/ClickHouse/ClickHouse/issues/68828): Turn off fault injection for insert in `01396_inactive_replica_cleanup_nodes_zookeeper`. [#68715](https://github.com/ClickHouse/ClickHouse/pull/68715) ([alesapin](https://github.com/alesapin)).
+* Backported in [#69046](https://github.com/ClickHouse/ClickHouse/issues/69046): Fix 01114_database_atomic flakiness. [#68930](https://github.com/ClickHouse/ClickHouse/pull/68930) ([Raúl Marín](https://github.com/Algunenano)).
+
--- a/docs/changelogs/v24.6.5.30-stable.md
+++ b/docs/changelogs/v24.6.5.30-stable.md
@ -0,0 +1,29 @@
+---
+sidebar_position: 1
+sidebar_label: 2024
+---
+
+# 2024 Changelog
+
+### ClickHouse release v24.6.5.30-stable (e6e196c92d6) FIXME as compared to v24.6.4.42-stable (c534bb4b4dd)
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+* Backported in [#68969](https://github.com/ClickHouse/ClickHouse/issues/68969): Fix the upper bound of the function `fromModifiedJulianDay`. It was supposed to be `9999-12-31` but was mistakenly set to `9999-01-01`. [#67583](https://github.com/ClickHouse/ClickHouse/pull/67583) ([PHO](https://github.com/depressed-pho)).
+* Backported in [#68814](https://github.com/ClickHouse/ClickHouse/issues/68814): Fixed crash in Parquet filtering when data types in the file substantially differ from requested types (e.g. `... FROM file('a.parquet', Parquet, 'x String')`, but the file has `x Int64`). Without this fix, use `input_format_parquet_filter_push_down = 0` as a workaround. [#68131](https://github.com/ClickHouse/ClickHouse/pull/68131) ([Michael Kolupaev](https://github.com/al13n321)).
+* Backported in [#69005](https://github.com/ClickHouse/ClickHouse/issues/69005): After https://github.com/ClickHouse/ClickHouse/pull/61984 `schema_inference_make_columns_nullable=0` still can make columns `Nullable` in Parquet/Arrow formats. The change was backward incompatible and users noticed the changes in the behaviour. This PR makes `schema_inference_make_columns_nullable=0` to work as before (no Nullable columns will be inferred) and introduces new value `auto` for this setting that will make columns `Nullable` only if data has information about nullability. [#68298](https://github.com/ClickHouse/ClickHouse/pull/68298) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#68883](https://github.com/ClickHouse/ClickHouse/issues/68883): Fixes [#50868](https://github.com/ClickHouse/ClickHouse/issues/50868). Small DateTime64 constant values returned by a nested subquery inside a distributed query were wrongly transformed to Nulls, thus causing errors and possible incorrect query results. [#68323](https://github.com/ClickHouse/ClickHouse/pull/68323) ([Shankar](https://github.com/shiyer7474)).
+* Backported in [#69025](https://github.com/ClickHouse/ClickHouse/issues/69025): Added back virtual columns ` _table` and `_database` to distributed tables. They were available until version 24.3. [#68672](https://github.com/ClickHouse/ClickHouse/pull/68672) ([Anton Popov](https://github.com/CurtizJ)).
+* Backported in [#68860](https://github.com/ClickHouse/ClickHouse/issues/68860): Fix possible error `Size of permutation (0) is less than required (...)` during Variant column permutation. [#68681](https://github.com/ClickHouse/ClickHouse/pull/68681) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#68786](https://github.com/ClickHouse/ClickHouse/issues/68786): Fix issue with materialized constant keys when hashing maps with arrays as keys in functions `sipHash(64/128)Keyed`. [#68731](https://github.com/ClickHouse/ClickHouse/pull/68731) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
+* Backported in [#69156](https://github.com/ClickHouse/ClickHouse/issues/69156): Fix possible wrong result during anyHeavy state merge. [#68950](https://github.com/ClickHouse/ClickHouse/pull/68950) ([Raúl Marín](https://github.com/Algunenano)).
+* Backported in [#69116](https://github.com/ClickHouse/ClickHouse/issues/69116): Fix logical error when we have empty async insert. [#69080](https://github.com/ClickHouse/ClickHouse/pull/69080) ([Han Fei](https://github.com/hanfei1991)).
+
+#### NO CL CATEGORY
+
+* Backported in [#68942](https://github.com/ClickHouse/ClickHouse/issues/68942):. [#68897](https://github.com/ClickHouse/ClickHouse/pull/68897) ([Alexander Gololobov](https://github.com/davenger)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Backported in [#68830](https://github.com/ClickHouse/ClickHouse/issues/68830): Turn off fault injection for insert in `01396_inactive_replica_cleanup_nodes_zookeeper`. [#68715](https://github.com/ClickHouse/ClickHouse/pull/68715) ([alesapin](https://github.com/alesapin)).
+* Backported in [#69048](https://github.com/ClickHouse/ClickHouse/issues/69048): Fix 01114_database_atomic flakiness. [#68930](https://github.com/ClickHouse/ClickHouse/pull/68930) ([Raúl Marín](https://github.com/Algunenano)).
+
--- a/docs/changelogs/v24.8.3.59-lts.md
+++ b/docs/changelogs/v24.8.3.59-lts.md
@ -0,0 +1,50 @@
+---
+sidebar_position: 1
+sidebar_label: 2024
+---
+
+# 2024 Changelog
+
+### ClickHouse release v24.8.3.59-lts (e729b9fa40e) FIXME as compared to v24.8.2.3-lts (b54f79ed323)
+
+#### New Feature
+* Backported in [#68710](https://github.com/ClickHouse/ClickHouse/issues/68710): Query cache entries can now be dropped by tag. For example, the query cache entry created by `SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'abc'` can now be dropped by `SYSTEM DROP QUERY CACHE TAG 'abc'` (or of course just: `SYSTEM DROP QUERY CACHE` which will clear the entire query cache). [#68477](https://github.com/ClickHouse/ClickHouse/pull/68477) ([Michał Tabaszewski](https://github.com/pinsvin00)).
+
+#### Improvement
+* Backported in [#69097](https://github.com/ClickHouse/ClickHouse/issues/69097): Support for the Spanish language in the embedded dictionaries. [#69035](https://github.com/ClickHouse/ClickHouse/pull/69035) ([Vasily Okunev](https://github.com/VOkunev)).
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+* Backported in [#68973](https://github.com/ClickHouse/ClickHouse/issues/68973): Fix the upper bound of the function `fromModifiedJulianDay`. It was supposed to be `9999-12-31` but was mistakenly set to `9999-01-01`. [#67583](https://github.com/ClickHouse/ClickHouse/pull/67583) ([PHO](https://github.com/depressed-pho)).
+* Backported in [#68818](https://github.com/ClickHouse/ClickHouse/issues/68818): Fixed crash in Parquet filtering when data types in the file substantially differ from requested types (e.g. `... FROM file('a.parquet', Parquet, 'x String')`, but the file has `x Int64`). Without this fix, use `input_format_parquet_filter_push_down = 0` as a workaround. [#68131](https://github.com/ClickHouse/ClickHouse/pull/68131) ([Michael Kolupaev](https://github.com/al13n321)).
+* Backported in [#68893](https://github.com/ClickHouse/ClickHouse/issues/68893): After https://github.com/ClickHouse/ClickHouse/pull/61984 `schema_inference_make_columns_nullable=0` still can make columns `Nullable` in Parquet/Arrow formats. The change was backward incompatible and users noticed the changes in the behaviour. This PR makes `schema_inference_make_columns_nullable=0` to work as before (no Nullable columns will be inferred) and introduces new value `auto` for this setting that will make columns `Nullable` only if data has information about nullability. [#68298](https://github.com/ClickHouse/ClickHouse/pull/68298) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#68721](https://github.com/ClickHouse/ClickHouse/issues/68721): Fixes [#50868](https://github.com/ClickHouse/ClickHouse/issues/50868). Small DateTime64 constant values returned by a nested subquery inside a distributed query were wrongly transformed to Nulls, thus causing errors and possible incorrect query results. [#68323](https://github.com/ClickHouse/ClickHouse/pull/68323) ([Shankar](https://github.com/shiyer7474)).
+* Backported in [#69029](https://github.com/ClickHouse/ClickHouse/issues/69029): Added back virtual columns ` _table` and `_database` to distributed tables. They were available until version 24.3. [#68672](https://github.com/ClickHouse/ClickHouse/pull/68672) ([Anton Popov](https://github.com/CurtizJ)).
+* Backported in [#68864](https://github.com/ClickHouse/ClickHouse/issues/68864): Fix possible error `Size of permutation (0) is less than required (...)` during Variant column permutation. [#68681](https://github.com/ClickHouse/ClickHouse/pull/68681) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#68854](https://github.com/ClickHouse/ClickHouse/issues/68854): Fix possible error `DB::Exception: Block structure mismatch in joined block stream: different columns:` with new JSON column. [#68686](https://github.com/ClickHouse/ClickHouse/pull/68686) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#68790](https://github.com/ClickHouse/ClickHouse/issues/68790): Fix issue with materialized constant keys when hashing maps with arrays as keys in functions `sipHash(64/128)Keyed`. [#68731](https://github.com/ClickHouse/ClickHouse/pull/68731) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
+* Backported in [#69108](https://github.com/ClickHouse/ClickHouse/issues/69108): TODO. [#68744](https://github.com/ClickHouse/ClickHouse/pull/68744) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#68850](https://github.com/ClickHouse/ClickHouse/issues/68850): Fix resolving dynamic subcolumns from subqueries in analyzer. [#68824](https://github.com/ClickHouse/ClickHouse/pull/68824) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#68911](https://github.com/ClickHouse/ClickHouse/issues/68911): Fix complex types metadata parsing in DeltaLake. Closes [#68739](https://github.com/ClickHouse/ClickHouse/issues/68739). [#68836](https://github.com/ClickHouse/ClickHouse/pull/68836) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Backported in [#69160](https://github.com/ClickHouse/ClickHouse/issues/69160): Fix possible wrong result during anyHeavy state merge. [#68950](https://github.com/ClickHouse/ClickHouse/pull/68950) ([Raúl Marín](https://github.com/Algunenano)).
+* Backported in [#69072](https://github.com/ClickHouse/ClickHouse/issues/69072): Fixed writing to Materialized Views with enabled setting `optimize_functions_to_subcolumns`. [#68951](https://github.com/ClickHouse/ClickHouse/pull/68951) ([Anton Popov](https://github.com/CurtizJ)).
+* Backported in [#69016](https://github.com/ClickHouse/ClickHouse/issues/69016): Don't use serializations cache in const Dynamic column methods. It could let to use-of-unitialized value or even race condition during aggregations. [#68953](https://github.com/ClickHouse/ClickHouse/pull/68953) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#69120](https://github.com/ClickHouse/ClickHouse/issues/69120): Fix logical error when we have empty async insert. [#69080](https://github.com/ClickHouse/ClickHouse/pull/69080) ([Han Fei](https://github.com/hanfei1991)).
+
+#### NO CL CATEGORY
+
+* Backported in [#68947](https://github.com/ClickHouse/ClickHouse/issues/68947):. [#68897](https://github.com/ClickHouse/ClickHouse/pull/68897) ([Alexander Gololobov](https://github.com/davenger)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Backported in [#68704](https://github.com/ClickHouse/ClickHouse/issues/68704): Fix enumerating dynamic subcolumns. [#68582](https://github.com/ClickHouse/ClickHouse/pull/68582) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#69000](https://github.com/ClickHouse/ClickHouse/issues/69000): Prioritizing of virtual columns in hive partitioning. [#68606](https://github.com/ClickHouse/ClickHouse/pull/68606) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
+* Backported in [#68799](https://github.com/ClickHouse/ClickHouse/issues/68799): CI: Disable SQLLogic job. [#68654](https://github.com/ClickHouse/ClickHouse/pull/68654) ([Max K.](https://github.com/maxknv)).
+* Backported in [#68834](https://github.com/ClickHouse/ClickHouse/issues/68834): Turn off fault injection for insert in `01396_inactive_replica_cleanup_nodes_zookeeper`. [#68715](https://github.com/ClickHouse/ClickHouse/pull/68715) ([alesapin](https://github.com/alesapin)).
+* Backported in [#68781](https://github.com/ClickHouse/ClickHouse/issues/68781): Fix flaky test 00989_parallel_parts_loading. [#68737](https://github.com/ClickHouse/ClickHouse/pull/68737) ([alesapin](https://github.com/alesapin)).
+* Backported in [#68762](https://github.com/ClickHouse/ClickHouse/issues/68762): To make patch release possible from every commit on release branch, package_debug build is required and must not be skipped. [#68750](https://github.com/ClickHouse/ClickHouse/pull/68750) ([Max K.](https://github.com/maxknv)).
+* Backported in [#68810](https://github.com/ClickHouse/ClickHouse/issues/68810): Try to disable rerun check if job triggered manually. [#68751](https://github.com/ClickHouse/ClickHouse/pull/68751) ([Max K.](https://github.com/maxknv)).
+* Backported in [#68962](https://github.com/ClickHouse/ClickHouse/issues/68962): Fix 2477 timeout. [#68752](https://github.com/ClickHouse/ClickHouse/pull/68752) ([jsc0218](https://github.com/jsc0218)).
+* Backported in [#68977](https://github.com/ClickHouse/ClickHouse/issues/68977): Check setting use_json_alias_for_old_object_type in runtime. [#68793](https://github.com/ClickHouse/ClickHouse/pull/68793) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#68852](https://github.com/ClickHouse/ClickHouse/issues/68852): Make dynamic structure selection more consistent. [#68802](https://github.com/ClickHouse/ClickHouse/pull/68802) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#69052](https://github.com/ClickHouse/ClickHouse/issues/69052): Fix 01114_database_atomic flakiness. [#68930](https://github.com/ClickHouse/ClickHouse/pull/68930) ([Raúl Marín](https://github.com/Algunenano)).
+
--- a/docs/en/engines/table-engines/mergetree-family/annindexes.md
+++ b/docs/en/engines/table-engines/mergetree-family/annindexes.md
@ -111,15 +111,16 @@ ANN indexes are built during column insertion and merge. As a result, `INSERT` a
 tables. ANNIndexes are ideally used only with immutable or rarely changed data, respectively when are far more read requests than write
 requests.

-ANN indexes support these queries:
+ANN indexes support this type of query:

-  ``` sql
-  SELECT *
-  FROM table
-  [WHERE ...]
-  ORDER BY Distance(vectors, Point)
-  LIMIT N
-  ```
+``` sql
+WITH [...] AS reference_vector
+SELECT *
+FROM table
+WHERE ...                       -- WHERE clause is optional
+ORDER BY Distance(vectors, reference_vector)
+LIMIT N
+```

 :::tip
 To avoid writing out large vectors, you can use [query
--- a/docs/en/interfaces/http.md
+++ b/docs/en/interfaces/http.md
@ -58,7 +58,7 @@ Connection: Close
 Content-Type: text/tab-separated-values; charset=UTF-8
 X-ClickHouse-Server-Display-Name: clickhouse.ru-central1.internal
 X-ClickHouse-Query-Id: 5abe861c-239c-467f-b955-8a201abb8b7f
-X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
+X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334", "real_time_microseconds": "0"}

 1
 ```
@ -472,7 +472,7 @@ $ curl -v 'http://localhost:8123/predefined_query'
 < X-ClickHouse-Format: Template
 < X-ClickHouse-Timezone: Asia/Shanghai
 < Keep-Alive: timeout=10
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334", "real_time_microseconds":"0"}
 <
 # HELP "Query" "Number of executing queries"
 # TYPE "Query" counter
@ -668,7 +668,7 @@ $ curl -vv  -H 'XXX:xxx' 'http://localhost:8123/hi'
 < Content-Type: text/html; charset=UTF-8
 < Transfer-Encoding: chunked
 < Keep-Alive: timeout=10
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334", "real_time_microseconds":"0"}
 <
 * Connection #0 to host localhost left intact
 Say Hi!%
@ -708,7 +708,7 @@ $ curl -v  -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler'
 < Content-Type: text/plain; charset=UTF-8
 < Transfer-Encoding: chunked
 < Keep-Alive: timeout=10
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334", "real_time_microseconds":"0"}
 <
 * Connection #0 to host localhost left intact
 <html ng-app="SMI2"><head><base href="http://ui.tabix.io/"></head><body><div ui-view="" class="content-ui"></div><script src="http://loader.tabix.io/master.js"></script></body></html>%
@ -766,7 +766,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler'
 < Content-Type: text/html; charset=UTF-8
 < Transfer-Encoding: chunked
 < Keep-Alive: timeout=10
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334", "real_time_microseconds":"0"}
 <
 <html><body>Absolute Path File</body></html>
 * Connection #0 to host localhost left intact
@ -785,7 +785,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler'
 < Content-Type: text/html; charset=UTF-8
 < Transfer-Encoding: chunked
 < Keep-Alive: timeout=10
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334", "real_time_microseconds":"0"}
 <
 <html><body>Relative Path File</body></html>
 * Connection #0 to host localhost left intact
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@ -3226,7 +3226,7 @@ Default value: `0`.

 ## lightweight_deletes_sync {#lightweight_deletes_sync}

-The same as 'mutation_sync', but controls only execution of lightweight deletes.
+The same as [`mutations_sync`](#mutations_sync), but controls only execution of lightweight deletes.

 Possible values:

--- a/docs/en/operations/storing-data.md
+++ b/docs/en/operations/storing-data.md
@ -499,7 +499,7 @@ Required parameters:
 - `type` — `encrypted`. Otherwise the encrypted disk is not created.
 - `disk` — Type of disk for data storage.
 - `key` — The key for encryption and decryption. Type: [Uint64](/docs/en/sql-reference/data-types/int-uint.md). You can use `key_hex` parameter to encode the key in hexadecimal form.
-    You can specify multiple keys using the `id` attribute (see example above).
+    You can specify multiple keys using the `id` attribute (see example below).

 Optional parameters:

--- a/docs/en/sql-reference/aggregate-functions/parametric-functions.md
+++ b/docs/en/sql-reference/aggregate-functions/parametric-functions.md
@ -104,7 +104,7 @@ Events that occur at the same second may lay in the sequence in an undefined ord

 **Parameters**

- `pattern` — Pattern string. See [Pattern syntax](#sequencematch).
+- `pattern` — Pattern string. See [Pattern syntax](#pattern-syntax).

 **Returned values**

@ -113,8 +113,7 @@ Events that occur at the same second may lay in the sequence in an undefined ord

 Type: `UInt8`.

-<a name="sequence-function-pattern-syntax"></a>
-**Pattern syntax**
+#### Pattern syntax

 - `(?N)` — Matches the condition argument at position `N`. Conditions are numbered in the `[1, 32]` range. For example, `(?1)` matches the argument passed to the `cond1` parameter.

@ -196,7 +195,7 @@ sequenceCount(pattern)(timestamp, cond1, cond2, ...)

 **Parameters**

- `pattern` — Pattern string. See [Pattern syntax](#sequencematch).
+- `pattern` — Pattern string. See [Pattern syntax](#pattern-syntax).

 **Returned values**

--- a/docs/en/sql-reference/aggregate-functions/reference/distinctdynamictypes.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/distinctdynamictypes.md
@ -0,0 +1,44 @@
+---
+slug: /en/sql-reference/aggregate-functions/reference/distinctdynamictypes
+sidebar_position: 215
+---
+
+# distinctDynamicTypes
+
+Calculates the list of distinct data types stored in [Dynamic](../../data-types/dynamic.md) column.
+
+**Syntax**
+
+```sql
+distinctDynamicTypes(dynamic)
+```
+
+**Arguments**
+
+- `dynamic` — [Dynamic](../../data-types/dynamic.md) column.
+
+**Returned Value**
+
+- The sorted list of data type names [Array(String)](../../data-types/array.md).
+
+**Example**
+
+Query:
+
+```sql
+DROP TABLE IF EXISTS test_dynamic;
+CREATE TABLE test_dynamic(d Dynamic) ENGINE = Memory;
+INSERT INTO test_dynamic VALUES (42), (NULL), ('Hello'), ([1, 2, 3]), ('2020-01-01'), (map(1, 2)), (43), ([4, 5]), (NULL), ('World'), (map(3, 4))
+```
+
+```sql
+SELECT distinctDynamicTypes(d) FROM test_dynamic;
+```
+
+Result:
+
+```reference
+┌─distinctDynamicTypes(d)──────────────────────────────────────┐
+│ ['Array(Int64)','Date','Int64','Map(UInt8, UInt8)','String'] │
+└──────────────────────────────────────────────────────────────┘
+```
--- a/docs/en/sql-reference/aggregate-functions/reference/distinctjsonpaths.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/distinctjsonpaths.md
@ -0,0 +1,125 @@
+---
+slug: /en/sql-reference/aggregate-functions/reference/distinctjsonpaths
+sidebar_position: 216
+---
+
+# distinctJSONPaths
+
+Calculates the list of distinct paths stored in [JSON](../../data-types/newjson.md) column.
+
+**Syntax**
+
+```sql
+distinctJSONPaths(json)
+```
+
+**Arguments**
+
+- `json` — [JSON](../../data-types/newjson.md) column.
+
+**Returned Value**
+
+- The sorted list of paths [Array(String)](../../data-types/array.md).
+
+**Example**
+
+Query:
+
+```sql
+DROP TABLE IF EXISTS test_json;
+CREATE TABLE test_json(json JSON) ENGINE = Memory;
+INSERT INTO test_json VALUES ('{"a" : 42, "b" : "Hello"}'), ('{"b" : [1, 2, 3], "c" : {"d" : {"e" : "2020-01-01"}}}'), ('{"a" : 43, "c" : {"d" : {"f" : [{"g" : 42}]}}}')
+```
+
+```sql
+SELECT distinctJSONPaths(json) FROM test_json;
+```
+
+Result:
+
+```reference
+┌─distinctJSONPaths(json)───┐
+│ ['a','b','c.d.e','c.d.f'] │
+└───────────────────────────┘
+```
+
+# distinctJSONPathsAndTypes
+
+Calculates the list of distinct paths and their types stored in [JSON](../../data-types/newjson.md) column.
+
+**Syntax**
+
+```sql
+distinctJSONPathsAndTypes(json)
+```
+
+**Arguments**
+
+- `json` — [JSON](../../data-types/newjson.md) column.
+
+**Returned Value**
+
+- The sorted map of paths and types [Map(String, Array(String))](../../data-types/map.md).
+
+**Example**
+
+Query:
+
+```sql
+DROP TABLE IF EXISTS test_json;
+CREATE TABLE test_json(json JSON) ENGINE = Memory;
+INSERT INTO test_json VALUES ('{"a" : 42, "b" : "Hello"}'), ('{"b" : [1, 2, 3], "c" : {"d" : {"e" : "2020-01-01"}}}'), ('{"a" : 43, "c" : {"d" : {"f" : [{"g" : 42}]}}}')
+```
+
+```sql
+SELECT distinctJSONPathsAndTypes(json) FROM test_json;
+```
+
+Result:
+
+```reference
+┌─distinctJSONPathsAndTypes(json)───────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
+│ {'a':['Int64'],'b':['Array(Nullable(Int64))','String'],'c.d.e':['Date'],'c.d.f':['Array(JSON(max_dynamic_types=16, max_dynamic_paths=256))']} │
+└───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
+```
+
+**Note**
+
+If JSON declaration contains paths with specified types, these paths will be always included in the result of `distinctJSONPaths/distinctJSONPathsAndTypes` functions even if input data didn't have values for these paths.
+
+```sql
+DROP TABLE IF EXISTS test_json;
+CREATE TABLE test_json(json JSON(a UInt32)) ENGINE = Memory;
+INSERT INTO test_json VALUES ('{"b" : "Hello"}'), ('{"b" : "World", "c" : [1, 2, 3]}');
+```
+
+```sql
+SELECT json FROM test_json;
+```
+
+```text
+┌─json──────────────────────────────────┐
+│ {"a":0,"b":"Hello"}                   │
+│ {"a":0,"b":"World","c":["1","2","3"]} │
+└───────────────────────────────────────┘
+```
+
+```sql
+SELECT distinctJSONPaths(json) FROM test_json;
+```
+
+```text
+┌─distinctJSONPaths(json)─┐
+│ ['a','b','c']           │
+└─────────────────────────┘
+```
+
+```sql
+SELECT distinctJSONPathsAndTypes(json) FROM test_json;
+```
+
+```text
+┌─distinctJSONPathsAndTypes(json)────────────────────────────────┐
+│ {'a':['UInt32'],'b':['String'],'c':['Array(Nullable(Int64))']} │
+└────────────────────────────────────────────────────────────────┘
+```
--- a/docs/en/sql-reference/data-types/newjson.md
+++ b/docs/en/sql-reference/data-types/newjson.md
@ -453,8 +453,8 @@ As we can see, after inserting paths `e` and `f.g` the limit was reached and we

 ### During merges of data parts in MergeTree table engines

-During merge of several data parts in MergeTree table the `JSON` column in the resulting data part can reach the limit of dynamic paths won't be able to store all paths from source parts as subcolumns.
-In this case ClickHouse chooses what paths will remain as subcolumns after merge and what types will be stored in the shared data structure. In most cases ClickHouse tries to keep paths that contains
+During merge of several data parts in MergeTree table the `JSON` column in the resulting data part can reach the limit of dynamic paths and won't be able to store all paths from source parts as subcolumns.
+In this case ClickHouse chooses what paths will remain as subcolumns after merge and what paths will be stored in the shared data structure. In most cases ClickHouse tries to keep paths that contain
 the largest number of non-null values and move the rarest paths to the shared data structure, but it depends on the implementation.

 Let's see an example of such merge. First, let's create a table with `JSON` column, set the limit of dynamic paths to `3` and insert values with `5` different paths:
@ -505,7 +505,130 @@ As we can see, ClickHouse kept the most frequent paths `a`, `b` and `c` and move

 ## Introspection functions

-There are several functions that can help to inspect the content of the JSON column: [JSONAllPaths](../functions/json-functions.md#jsonallpaths), [JSONAllPathsWithTypes](../functions/json-functions.md#jsonallpathswithtypes), [JSONDynamicPaths](../functions/json-functions.md#jsondynamicpaths), [JSONDynamicPathsWithTypes](../functions/json-functions.md#jsondynamicpathswithtypes), [JSONSharedDataPaths](../functions/json-functions.md#jsonshareddatapaths), [JSONSharedDataPathsWithTypes](../functions/json-functions.md#jsonshareddatapathswithtypes).
+There are several functions that can help to inspect the content of the JSON column: [JSONAllPaths](../functions/json-functions.md#jsonallpaths), [JSONAllPathsWithTypes](../functions/json-functions.md#jsonallpathswithtypes), [JSONDynamicPaths](../functions/json-functions.md#jsondynamicpaths), [JSONDynamicPathsWithTypes](../functions/json-functions.md#jsondynamicpathswithtypes), [JSONSharedDataPaths](../functions/json-functions.md#jsonshareddatapaths), [JSONSharedDataPathsWithTypes](../functions/json-functions.md#jsonshareddatapathswithtypes), [distinctDynamicTypes](../aggregate-functions/reference/distinctdynamictypes.md), [distinctJSONPaths and distinctJSONPathsAndTypes](../aggregate-functions/reference/distinctjsonpaths.md)
+
+**Examples**
+
+Let's investigate the content of [GH Archive](https://www.gharchive.org/) dataset for `2020-01-01` date:
+
+```sql
+SELECT arrayJoin(distinctJSONPaths(json)) FROM s3('s3://clickhouse-public-datasets/gharchive/original/2020-01-01-*.json.gz', JSONAsObject) 
+```
+
+```text
+┌─arrayJoin(distinctJSONPaths(json))─────────────────────────┐
+│ actor.avatar_url                                           │
+│ actor.display_login                                        │
+│ actor.gravatar_id                                          │
+│ actor.id                                                   │
+│ actor.login                                                │
+│ actor.url                                                  │
+│ created_at                                                 │
+│ id                                                         │
+│ org.avatar_url                                             │
+│ org.gravatar_id                                            │
+│ org.id                                                     │
+│ org.login                                                  │
+│ org.url                                                    │
+│ payload.action                                             │
+│ payload.before                                             │
+│ payload.comment._links.html.href                           │
+│ payload.comment._links.pull_request.href                   │
+│ payload.comment._links.self.href                           │
+│ payload.comment.author_association                         │
+│ payload.comment.body                                       │
+│ payload.comment.commit_id                                  │
+│ payload.comment.created_at                                 │
+│ payload.comment.diff_hunk                                  │
+│ payload.comment.html_url                                   │
+│ payload.comment.id                                         │
+│ payload.comment.in_reply_to_id                             │
+│ payload.comment.issue_url                                  │
+│ payload.comment.line                                       │
+│ payload.comment.node_id                                    │
+│ payload.comment.original_commit_id                         │
+│ payload.comment.original_position                          │
+│ payload.comment.path                                       │
+│ payload.comment.position                                   │
+│ payload.comment.pull_request_review_id                     │
+...
+│ payload.release.node_id                                    │
+│ payload.release.prerelease                                 │
+│ payload.release.published_at                               │
+│ payload.release.tag_name                                   │
+│ payload.release.tarball_url                                │
+│ payload.release.target_commitish                           │
+│ payload.release.upload_url                                 │
+│ payload.release.url                                        │
+│ payload.release.zipball_url                                │
+│ payload.size                                               │
+│ public                                                     │
+│ repo.id                                                    │
+│ repo.name                                                  │
+│ repo.url                                                   │
+│ type                                                       │
+└─arrayJoin(distinctJSONPaths(json))─────────────────────────┘
+```
+
+```sql
+SELECT arrayJoin(distinctJSONPathsAndTypes(json)) FROM s3('s3://clickhouse-public-datasets/gharchive/original/2020-01-01-*.json.gz', JSONAsObject) SETTINGS date_time_input_format='best_effort'
+```
+
+
+```text
+┌─arrayJoin(distinctJSONPathsAndTypes(json))──────────────────┐
+│ ('actor.avatar_url',['String'])                             │
+│ ('actor.display_login',['String'])                          │
+│ ('actor.gravatar_id',['String'])                            │
+│ ('actor.id',['Int64'])                                      │
+│ ('actor.login',['String'])                                  │
+│ ('actor.url',['String'])                                    │
+│ ('created_at',['DateTime'])                                 │
+│ ('id',['String'])                                           │
+│ ('org.avatar_url',['String'])                               │
+│ ('org.gravatar_id',['String'])                              │
+│ ('org.id',['Int64'])                                        │
+│ ('org.login',['String'])                                    │
+│ ('org.url',['String'])                                      │
+│ ('payload.action',['String'])                               │
+│ ('payload.before',['String'])                               │
+│ ('payload.comment._links.html.href',['String'])             │
+│ ('payload.comment._links.pull_request.href',['String'])     │
+│ ('payload.comment._links.self.href',['String'])             │
+│ ('payload.comment.author_association',['String'])           │
+│ ('payload.comment.body',['String'])                         │
+│ ('payload.comment.commit_id',['String'])                    │
+│ ('payload.comment.created_at',['DateTime'])                 │
+│ ('payload.comment.diff_hunk',['String'])                    │
+│ ('payload.comment.html_url',['String'])                     │
+│ ('payload.comment.id',['Int64'])                            │
+│ ('payload.comment.in_reply_to_id',['Int64'])                │
+│ ('payload.comment.issue_url',['String'])                    │
+│ ('payload.comment.line',['Int64'])                          │
+│ ('payload.comment.node_id',['String'])                      │
+│ ('payload.comment.original_commit_id',['String'])           │
+│ ('payload.comment.original_position',['Int64'])             │
+│ ('payload.comment.path',['String'])                         │
+│ ('payload.comment.position',['Int64'])                      │
+│ ('payload.comment.pull_request_review_id',['Int64'])        │
+...
+│ ('payload.release.node_id',['String'])                      │
+│ ('payload.release.prerelease',['Bool'])                     │
+│ ('payload.release.published_at',['DateTime'])               │
+│ ('payload.release.tag_name',['String'])                     │
+│ ('payload.release.tarball_url',['String'])                  │
+│ ('payload.release.target_commitish',['String'])             │
+│ ('payload.release.upload_url',['String'])                   │
+│ ('payload.release.url',['String'])                          │
+│ ('payload.release.zipball_url',['String'])                  │
+│ ('payload.size',['Int64'])                                  │
+│ ('public',['Bool'])                                         │
+│ ('repo.id',['Int64'])                                       │
+│ ('repo.name',['String'])                                    │
+│ ('repo.url',['String'])                                     │
+│ ('type',['String'])                                         │
+└─arrayJoin(distinctJSONPathsAndTypes(json))──────────────────┘
+```

 ## Tips for better usage of the JSON type

--- a/docs/en/sql-reference/functions/array-functions.md
+++ b/docs/en/sql-reference/functions/array-functions.md
@ -2035,6 +2035,7 @@ Query:
 SELECT arrayZip(['a', 'b', 'c'], [5, 2, 1]);
 ```

+
 Result:

 ``` text
@ -2043,6 +2044,43 @@ Result:
 └──────────────────────────────────────┘
 ```

+## arrayZipUnaligned
+
+Combines multiple arrays into a single array, allowing for unaligned arrays. The resulting array contains the corresponding elements of the source arrays grouped into tuples in the listed order of arguments.
+
+**Syntax**
+
+``` sql
+arrayZipUnaligned(arr1, arr2, ..., arrN)
+```
+
+**Arguments**
+
+- `arrN` — [Array](../data-types/array.md).
+
+The function can take any number of arrays of different types.
+
+**Returned value**
+
+- Array with elements from the source arrays grouped into [tuples](../data-types/tuple.md). Data types in the tuple are the same as types of the input arrays and in the same order as arrays are passed. [Array](../data-types/array.md). If the arrays have different sizes, the shorter arrays will be padded with `null` values.
+
+**Example**
+
+Query:
+
+``` sql
+SELECT arrayZipUnaligned(['a'], [1, 2, 3]);
+```
+
+Result:
+
+``` text
+┌─arrayZipUnaligned(['a'], [1, 2, 3])─┐
+│ [('a',1),(NULL,2),(NULL,3)]         │
+└─────────────────────────────────────┘
+```
+
+
 ## arrayAUC

 Calculate AUC (Area Under the Curve, which is a concept in machine learning, see more details: <https://en.wikipedia.org/wiki/Receiver_operating_characteristic#Area_under_the_curve>).
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@ -2019,7 +2019,7 @@ Alias: `dateTrunc`.

    `unit` argument is case-insensitive.

- `value` — Date and time. [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md).
+- `value` — Date and time. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md).
 - `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../data-types/string.md).

 **Returned value**
--- a/docs/en/sql-reference/functions/type-conversion-functions.md
+++ b/docs/en/sql-reference/functions/type-conversion-functions.md
@ -49,6 +49,55 @@ SETTINGS cast_keep_nullable = 1
 └──────────────────┴─────────────────────┴──────────────────┘
 ```

+## toBool
+
+Converts an input value to a value of type [`Bool`](../data-types/boolean.md). Throws an exception in case of an error.
+
+**Syntax**
+
+```sql
+toBool(expr)
+```
+
+**Arguments**
+
+- `expr` — Expression returning a number or a string. [Expression](../syntax.md/#syntax-expressions).
+
+Supported arguments:
+- Values of type (U)Int8/16/32/64/128/256.
+- Values of type Float32/64.
+- Strings `true` or `false` (case-insensitive).
+
+**Returned value**
+
+- Returns `true` or `false` based on evaluation of the argument. [Bool](../data-types/boolean.md).
+
+**Example**
+
+Query:
+
+```sql
+SELECT
+    toBool(toUInt8(1)),
+    toBool(toInt8(-1)),
+    toBool(toFloat32(1.01)),
+    toBool('true'),
+    toBool('false'),
+    toBool('FALSE')
+FORMAT Vertical
+```
+
+Result:
+
+```response
+toBool(toUInt8(1)):      true
+toBool(toInt8(-1)):      true
+toBool(toFloat32(1.01)): true
+toBool('true'):          true
+toBool('false'):         false
+toBool('FALSE'):         false
+```
+
 ## toInt8

 Converts an input value to a value of type [`Int8`](../data-types/int-uint.md). Throws an exception in case of an error.
--- a/docs/en/sql-reference/operators/index.md
+++ b/docs/en/sql-reference/operators/index.md
@ -265,8 +265,6 @@ SELECT now() AS current_date_time, current_date_time + INTERVAL '4' day + INTERV
 └─────────────────────┴────────────────────────────────────────────────────────────┘
 ```

-You can work with dates without using `INTERVAL`, just by adding or subtracting seconds, minutes, and hours. For example, an interval of one day can be set by adding `60*60*24`.
-
 :::note    
 The `INTERVAL` syntax or `addDays` function are always preferred. Simple addition or subtraction (syntax like `now() + ...`) doesn't consider time settings. For example, daylight saving time.
 :::
--- a/docs/en/sql-reference/statements/delete.md
+++ b/docs/en/sql-reference/statements/delete.md
@ -10,10 +10,10 @@ title: The Lightweight DELETE Statement
 The lightweight `DELETE` statement removes rows from the table `[db.]table` that match the expression `expr`. It is only available for the *MergeTree table engine family.

 ``` sql
-DELETE FROM [db.]table [ON CLUSTER cluster] WHERE expr;
+DELETE FROM [db.]table [ON CLUSTER cluster] [IN PARTITION partition_expr] WHERE expr;
 ```

-It is called "lightweight `DELETE`" to contrast it to the [ALTER table DELETE](/en/sql-reference/statements/alter/delete) command, which is a heavyweight process.
+It is called "lightweight `DELETE`" to contrast it to the [ALTER TABLE ... DELETE](/en/sql-reference/statements/alter/delete) command, which is a heavyweight process.

 ## Examples

@ -22,23 +22,25 @@ It is called "lightweight `DELETE`" to contrast it to the [ALTER table DELETE](/
 DELETE FROM hits WHERE Title LIKE '%hello%';
 ```

-## Lightweight `DELETE` does not delete data from storage immediately
+## Lightweight `DELETE` does not delete data immediately

-With lightweight `DELETE`, deleted rows are internally marked as deleted immediately and will be automatically filtered out of all subsequent queries. However, cleanup of data happens during the next merge. As a result, it is possible that for an unspecified period, data is not actually deleted from storage and is only marked as deleted.
+Lightweight `DELETE` is implemented as a [mutation](/en/sql-reference/statements/alter#mutations), which is executed asynchronously in the background by default. The statement is going to return almost immediately, but the data can still be visible to queries until the mutation is finished.

-If you need to guarantee that your data is deleted from storage in a predictable time, consider using the [ALTER table DELETE](/en/sql-reference/statements/alter/delete) command. Note that deleting data using `ALTER table DELETE` may consume significant resources as it recreates all affected parts.
+The mutation marks rows as deleted, and at that point, they will no longer show up in query results. It does not physically delete the data, this will happen during the next merge. As a result, it is possible that for an unspecified period, data is not actually deleted from storage and is only marked as deleted.
+
+If you need to guarantee that your data is deleted from storage in a predictable time, consider using the table setting [`min_age_to_force_merge_seconds`](https://clickhouse.com/docs/en/operations/settings/merge-tree-settings#min_age_to_force_merge_seconds). Or you can use the [ALTER TABLE ... DELETE](/en/sql-reference/statements/alter/delete) command. Note that deleting data using `ALTER TABLE ... DELETE` may consume significant resources as it recreates all affected parts.

 ## Deleting large amounts of data

 Large deletes can negatively affect ClickHouse performance. If you are attempting to delete all rows from a table, consider using the [`TRUNCATE TABLE`](/en/sql-reference/statements/truncate) command.

-If you anticipate frequent deletes, consider using a [custom partitioning key](/en/engines/table-engines/mergetree-family/custom-partitioning-key). You can then use the [`ALTER TABLE...DROP PARTITION`](/en/sql-reference/statements/alter/partition#drop-partitionpart) command to quickly drop all rows associated with that partition.
+If you anticipate frequent deletes, consider using a [custom partitioning key](/en/engines/table-engines/mergetree-family/custom-partitioning-key). You can then use the [`ALTER TABLE ... DROP PARTITION`](/en/sql-reference/statements/alter/partition#drop-partitionpart) command to quickly drop all rows associated with that partition.

 ## Limitations of lightweight `DELETE`

 ### Lightweight `DELETE`s with projections

-By default, `DELETE` does not work for tables with projections. This is because rows in a projection may be affected by a `DELETE` operation. But there is a [MergeTree setting](https://clickhouse.com/docs/en/operations/settings/merge-tree-settings) `lightweight_mutation_projection_mode` can change the behavior.
+By default, `DELETE` does not work for tables with projections. This is because rows in a projection may be affected by a `DELETE` operation. But there is a [MergeTree setting](https://clickhouse.com/docs/en/operations/settings/merge-tree-settings) `lightweight_mutation_projection_mode` to change the behavior.

 ## Performance considerations when using lightweight `DELETE`

@ -48,7 +50,7 @@ The following can also negatively impact lightweight `DELETE` performance:

 - A heavy `WHERE` condition in a `DELETE` query.
 - If the mutations queue is filled with many other mutations, this can possibly lead to performance issues as all mutations on a table are executed sequentially.
- The affected table having a very large number of data parts.
+- The affected table has a very large number of data parts.
 - Having a lot of data in compact parts. In a Compact part, all columns are stored in one file.

 ## Delete permissions
@ -61,31 +63,31 @@ GRANT ALTER DELETE ON db.table to username;

 ## How lightweight DELETEs work internally in ClickHouse

-1. A "mask" is applied to affected rows
+1. **A "mask" is applied to affected rows**

-When a `DELETE FROM table ...` query is executed, ClickHouse saves a mask where each row is marked as either “existing” or as “deleted”. Those “deleted” rows are omitted for subsequent queries. However, rows are actually only removed later by subsequent merges. Writing this mask is much more lightweight than what is done by an `ALTER table DELETE` query.
+   When a `DELETE FROM table ...` query is executed, ClickHouse saves a mask where each row is marked as either “existing” or as “deleted”. Those “deleted” rows are omitted for subsequent queries. However, rows are actually only removed later by subsequent merges. Writing this mask is much more lightweight than what is done by an `ALTER TABLE ... DELETE` query.

-The mask is implemented as a hidden `_row_exists` system column that stores `True` for all visible rows and `False` for deleted ones. This column is only present in a part if some rows in the part were deleted. This column does not exist when a part has all values equal to `True`.
+   The mask is implemented as a hidden `_row_exists` system column that stores `True` for all visible rows and `False` for deleted ones. This column is only present in a part if some rows in the part were deleted. This column does not exist when a part has all values equal to `True`.

-2. `SELECT` queries are transformed to include the mask
+2. **`SELECT` queries are transformed to include the mask**

-When a masked column is used in a query, the `SELECT ... FROM table WHERE condition` query internally is extended by the predicate on `_row_exists` and is transformed to:
-```sql
-SELECT ... FROM table PREWHERE _row_exists WHERE condition
-```
-At execution time, the column `_row_exists` is read to determine which rows should not be returned. If there are many deleted rows, ClickHouse can determine which granules can be fully skipped when reading the rest of the columns.
+   When a masked column is used in a query, the `SELECT ... FROM table WHERE condition` query internally is extended by the predicate on `_row_exists` and is transformed to:
+   ```sql
+   SELECT ... FROM table PREWHERE _row_exists WHERE condition
+   ```
+   At execution time, the column `_row_exists` is read to determine which rows should not be returned. If there are many deleted rows, ClickHouse can determine which granules can be fully skipped when reading the rest of the columns.

-3. `DELETE` queries are transformed to `ALTER table UPDATE` queries
+3. **`DELETE` queries are transformed to `ALTER TABLE ... UPDATE` queries**

-The `DELETE FROM table WHERE condition` is translated into an `ALTER table UPDATE _row_exists = 0 WHERE condition` mutation.
+   The `DELETE FROM table WHERE condition` is translated into an `ALTER TABLE table UPDATE _row_exists = 0 WHERE condition` mutation.

-Internally, this mutation is executed in two steps:
+   Internally, this mutation is executed in two steps:

-1. A `SELECT count() FROM table WHERE condition` command is executed for each individual part to determine if the part is affected.
+   1. A `SELECT count() FROM table WHERE condition` command is executed for each individual part to determine if the part is affected.

-2. Based on the commands above, affected parts are then mutated, and hardlinks are created for unaffected parts. In the case of wide parts, the `_row_exists` column for each row is updated and all other columns' files are hardlinked. For compact parts, all columns are re-written because they are all stored together in one file.
+   2. Based on the commands above, affected parts are then mutated, and hardlinks are created for unaffected parts. In the case of wide parts, the `_row_exists` column for each row is updated, and all other columns' files are hardlinked. For compact parts, all columns are re-written because they are all stored together in one file.

-From the steps above, we can see that lightweight deletes using the masking technique improves performance over traditional `ALTER table DELETE` commands because `ALTER table DELETE` reads and re-writes all the columns' files for affected parts.
+   From the steps above, we can see that lightweight `DELETE` using the masking technique improves performance over traditional `ALTER TABLE ... DELETE` because it does not re-write all the columns' files for affected parts.

 ## Related content

--- a/docs/en/sql-reference/transactions.md
+++ b/docs/en/sql-reference/transactions.md
@ -8,14 +8,14 @@ slug: /en/guides/developer/transactional
 This is transactional (ACID) if the inserted rows are packed and inserted as a single block (see Notes):
 - Atomic: an INSERT succeeds or is rejected as a whole: if a confirmation is sent to the client, then all rows were inserted; if an error is sent to the client, then no rows were inserted.
 - Consistent: if there are no table constraints violated, then all rows in an INSERT are inserted and the INSERT succeeds; if constraints are violated, then no rows are inserted.
- Isolated: concurrent clients observe a consistent snapshot of the table–the state of the table either as it was before the INSERT attempt, or after the successful INSERT; no partial state is seen
+- Isolated: concurrent clients observe a consistent snapshot of the table–the state of the table either as it was before the INSERT attempt, or after the successful INSERT; no partial state is seen. Clients inside of another transaction have [snapshot isolation](https://en.wikipedia.org/wiki/Snapshot_isolation), while clients outside of a transaction have [read uncommitted](https://en.wikipedia.org/wiki/Isolation_(database_systems)#Read_uncommitted) isolation level.
 - Durable: a successful INSERT is written to the filesystem before answering to the client, on a single replica or multiple replicas (controlled by the `insert_quorum` setting), and ClickHouse can ask the OS to sync the filesystem data on the storage media (controlled by the `fsync_after_insert` setting).
 - INSERT into multiple tables with one statement is possible if materialized views are involved (the INSERT from the client is to a table which has associate materialized views).

 ## Case 2: INSERT into multiple partitions, of one table, of the MergeTree* family

 Same as Case 1 above, with this detail:
- If table has many partitions and INSERT covers many partitions–then insertion into every partition is transactional on its own
+- If table has many partitions and INSERT covers many partitions, then insertion into every partition is transactional on its own


 ## Case 3: INSERT into one distributed table of the MergeTree* family
@ -38,7 +38,7 @@ Same as Case 1 above, with this detail:
  - the insert format is column-based (like Native, Parquet, ORC, etc) and the data contains only one block of data
 - the size of the inserted block in general may depend on many settings (for example: `max_block_size`, `max_insert_block_size`, `min_insert_block_size_rows`, `min_insert_block_size_bytes`, `preferred_block_size_bytes`, etc)
 - if the client did not receive an answer from the server, the client does not know if the transaction succeeded, and it can repeat the transaction, using exactly-once insertion properties
- ClickHouse is using MVCC with snapshot isolation internally
+- ClickHouse is using [MVCC](https://en.wikipedia.org/wiki/Multiversion_concurrency_control) with [snapshot isolation](https://en.wikipedia.org/wiki/Snapshot_isolation) internally for concurrent transactions
 - all ACID properties are valid even in the case of server kill/crash
 - either insert_quorum into different AZ or fsync should be enabled to ensure durable inserts in the typical setup
 - "consistency" in ACID terms does not cover the semantics of distributed systems, see https://jepsen.io/consistency which is controlled by different settings (select_sequential_consistency)
@ -260,7 +260,7 @@ FROM mergetree_table
 ### Transactions introspection

 You can inspect transactions by querying the `system.transactions` table, but note that you cannot query that
-table from a session that is in a transaction–open a second `clickhouse client` session to query that table.
+table from a session that is in a transaction. Open a second `clickhouse client` session to query that table.

 ```sql
 SELECT *
--- a/docs/ru/interfaces/http.md
+++ b/docs/ru/interfaces/http.md
@ -50,7 +50,7 @@ Connection: Close
 Content-Type: text/tab-separated-values; charset=UTF-8
 X-ClickHouse-Server-Display-Name: clickhouse.ru-central1.internal
 X-ClickHouse-Query-Id: 5abe861c-239c-467f-b955-8a201abb8b7f
-X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
+X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334", "real_time_microseconds":"0"}

 1
 ```
@ -367,7 +367,7 @@ $ curl -v 'http://localhost:8123/predefined_query'
 < X-ClickHouse-Format: Template
 < X-ClickHouse-Timezone: Asia/Shanghai
 < Keep-Alive: timeout=10
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0", "elapsed_ns":"662334", "real_time_microseconds":"0"}
 <
 # HELP "Query" "Number of executing queries"
 # TYPE "Query" counter
@ -601,7 +601,7 @@ $ curl -v  -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler'
 < Content-Type: text/plain; charset=UTF-8
 < Transfer-Encoding: chunked
 < Keep-Alive: timeout=10
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334", "real_time_microseconds":"0"}
 <
 * Connection #0 to host localhost left intact
 <html ng-app="SMI2"><head><base href="http://ui.tabix.io/"></head><body><div ui-view="" class="content-ui"></div><script src="http://loader.tabix.io/master.js"></script></body></html>%
@ -659,7 +659,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler'
 < Content-Type: text/html; charset=UTF-8
 < Transfer-Encoding: chunked
 < Keep-Alive: timeout=10
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334", "real_time_microseconds":"0"}
 <
 <html><body>Absolute Path File</body></html>
 * Connection #0 to host localhost left intact
@ -678,7 +678,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler'
 < Content-Type: text/html; charset=UTF-8
 < Transfer-Encoding: chunked
 < Keep-Alive: timeout=10
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334", "real_time_microseconds":"0"}
 <
 <html><body>Relative Path File</body></html>
 * Connection #0 to host localhost left intact
--- a/docs/zh/interfaces/http.md
+++ b/docs/zh/interfaces/http.md
@ -53,7 +53,7 @@ Connection: Close
 Content-Type: text/tab-separated-values; charset=UTF-8
 X-ClickHouse-Server-Display-Name: clickhouse.ru-central1.internal
 X-ClickHouse-Query-Id: 5abe861c-239c-467f-b955-8a201abb8b7f
-X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
+X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","real_time_microseconds":"0"}

 1
 ```
@ -363,7 +363,7 @@ $ curl -v 'http://localhost:8123/predefined_query'
 < X-ClickHouse-Format: Template
 < X-ClickHouse-Timezone: Asia/Shanghai
 < Keep-Alive: timeout=10
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334", "real_time_microseconds":"0"}
 <
 # HELP "Query" "Number of executing queries"
 # TYPE "Query" counter
@ -524,7 +524,7 @@ $ curl -vv  -H 'XXX:xxx' 'http://localhost:8123/hi'
 < Content-Type: text/html; charset=UTF-8
 < Transfer-Encoding: chunked
 < Keep-Alive: timeout=10
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334", "real_time_microseconds":"0"}
 <
 * Connection #0 to host localhost left intact
 Say Hi!%
@ -564,7 +564,7 @@ $ curl -v  -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler'
 < Content-Type: text/plain; charset=UTF-8
 < Transfer-Encoding: chunked
 < Keep-Alive: timeout=10
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","real_time_microseconds":"0"}
 <
 * Connection #0 to host localhost left intact
 <html ng-app="SMI2"><head><base href="http://ui.tabix.io/"></head><body><div ui-view="" class="content-ui"></div><script src="http://loader.tabix.io/master.js"></script></body></html>%
@ -616,7 +616,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler'
 < Content-Type: text/html; charset=UTF-8
 < Transfer-Encoding: chunked
 < Keep-Alive: timeout=10
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","real_time_microseconds":"0"}
 <
 <html><body>Absolute Path File</body></html>
 * Connection #0 to host localhost left intact
@ -635,7 +635,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler'
 < Content-Type: text/html; charset=UTF-8
 < Transfer-Encoding: chunked
 < Keep-Alive: timeout=10
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","real_time_microseconds":"0"}
 <
 <html><body>Relative Path File</body></html>
 * Connection #0 to host localhost left intact
--- a/programs/keeper-client/Commands.cpp
+++ b/programs/keeper-client/Commands.cpp
@ -677,4 +677,122 @@ void GetAllChildrenNumberCommand::execute(const ASTKeeperQuery * query, KeeperCl
    std::cout << totalNumChildren << "\n";
 }

+namespace
+{
+
+class CPMVOperation
+{
+    constexpr static UInt64 kTryLimit = 1000;
+
+public:
+    CPMVOperation(String src_, String dest_, bool remove_src_, KeeperClient * client_)
+        : src(std::move(src_)), dest(std::move(dest_)), remove_src(remove_src_), client(client_)
+    {
+    }
+
+    bool isTryLimitReached() const { return failed_tries_count >= kTryLimit; }
+
+    bool isCompleted() const { return is_completed; }
+
+    void perform()
+    {
+        Coordination::Stat src_stat;
+        String data = client->zookeeper->get(src, &src_stat);
+
+        Coordination::Requests ops{
+            zkutil::makeCheckRequest(src, src_stat.version),
+            zkutil::makeCreateRequest(dest, data, zkutil::CreateMode::Persistent), // Do we need to copy ACLs here?
+        };
+
+        if (remove_src)
+            ops.push_back(zkutil::makeRemoveRequest(src, src_stat.version));
+
+        Coordination::Responses responses;
+        auto code = client->zookeeper->tryMulti(ops, responses);
+
+        switch (code)
+        {
+            case Coordination::Error::ZOK: {
+                is_completed = true;
+                return;
+            }
+            case Coordination::Error::ZBADVERSION: {
+                ++failed_tries_count;
+
+                if (isTryLimitReached())
+                    zkutil::KeeperMultiException::check(code, ops, responses);
+
+                return;
+            }
+            default:
+                zkutil::KeeperMultiException::check(code, ops, responses);
+        }
+
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Unreachable");
+    }
+
+private:
+    String src;
+    String dest;
+    bool remove_src = false;
+    KeeperClient * client = nullptr;
+
+    bool is_completed = false;
+    uint64_t failed_tries_count = 0;
+};
+
+}
+
+bool CPCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, [[maybe_unused]] Expected & expected) const
+{
+    String src_path;
+    if (!parseKeeperPath(pos, expected, src_path))
+        return false;
+    node->args.push_back(std::move(src_path));
+
+    String to_path;
+    if (!parseKeeperPath(pos, expected, to_path))
+        return false;
+    node->args.push_back(std::move(to_path));
+
+    return true;
+}
+
+void CPCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const
+{
+    auto src = client->getAbsolutePath(query->args[0].safeGet<String>());
+    auto dest = client->getAbsolutePath(query->args[1].safeGet<String>());
+
+    CPMVOperation operation(std::move(src), std::move(dest), /*remove_src_=*/false, /*client_=*/client);
+
+    while (!operation.isTryLimitReached() && !operation.isCompleted())
+        operation.perform();
+}
+
+bool MVCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
+{
+    String src_path;
+    if (!parseKeeperPath(pos, expected, src_path))
+        return false;
+    node->args.push_back(std::move(src_path));
+
+    String to_path;
+    if (!parseKeeperPath(pos, expected, to_path))
+        return false;
+    node->args.push_back(std::move(to_path));
+
+    return true;
+}
+
+void MVCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const
+{
+    auto src = client->getAbsolutePath(query->args[0].safeGet<String>());
+    auto dest = client->getAbsolutePath(query->args[1].safeGet<String>());
+
+    CPMVOperation operation(std::move(src), std::move(dest), /*remove_src_=*/true, /*client_=*/client);
+
+    while (!operation.isTryLimitReached() && !operation.isCompleted())
+        operation.perform();
+}
+
 }
--- a/programs/keeper-client/Commands.h
+++ b/programs/keeper-client/Commands.h
@ -266,4 +266,32 @@ class GetAllChildrenNumberCommand : public IKeeperClientCommand
    }
 };

+class CPCommand : public IKeeperClientCommand
+{
+    String getName() const override { return "cp"; }
+
+    bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
+
+    void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
+
+    String getHelpMessage() const override
+    {
+        return "{} <src> <dest> -- Copies 'src' node to 'dest' path.";
+    }
+};
+
+class MVCommand : public IKeeperClientCommand
+{
+    String getName() const override { return "mv"; }
+
+    bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
+
+    void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
+
+    String getHelpMessage() const override
+    {
+        return "{} <src> <dest> -- Moves 'src' node to the 'dest' path.";
+    }
+};
+
 }
--- a/programs/keeper-client/KeeperClient.cpp
+++ b/programs/keeper-client/KeeperClient.cpp
@ -212,6 +212,8 @@ void KeeperClient::initialize(Poco::Util::Application & /* self */)
        std::make_shared<FourLetterWordCommand>(),
        std::make_shared<GetDirectChildrenNumberCommand>(),
        std::make_shared<GetAllChildrenNumberCommand>(),
+        std::make_shared<CPCommand>(),
+        std::make_shared<MVCommand>(),
    });

    String home_path;
--- a/src/AggregateFunctions/AggregateFunctionAnyHeavy.cpp
+++ b/src/AggregateFunctions/AggregateFunctionAnyHeavy.cpp
@ -68,7 +68,10 @@ public:
        if (data().isEqualTo(to.data()))
            counter += to.counter;
        else if (!data().has() || counter < to.counter)
+        {
            data().set(to.data(), arena);
+            counter = to.counter - counter;
+        }
        else
            counter -= to.counter;
    }
--- a/src/AggregateFunctions/AggregateFunctionDistinctDynamicTypes.cpp
+++ b/src/AggregateFunctions/AggregateFunctionDistinctDynamicTypes.cpp
@ -0,0 +1,161 @@
+#include <unordered_set>
+#include <IO/WriteHelpers.h>
+#include <IO/ReadHelpers.h>
+
+#include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypesBinaryEncoding.h>
+#include <Columns/ColumnDynamic.h>
+
+#include <AggregateFunctions/IAggregateFunction.h>
+#include <AggregateFunctions/AggregateFunctionFactory.h>
+#include <AggregateFunctions/FactoryHelpers.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+    extern const int TOO_LARGE_ARRAY_SIZE;
+}
+
+struct AggregateFunctionDistinctDynamicTypesData
+{
+    constexpr static size_t MAX_ARRAY_SIZE = 0xFFFFFF;
+
+    std::unordered_set<String> data;
+
+    void add(const String & type)
+    {
+        data.insert(type);
+    }
+
+    void merge(const AggregateFunctionDistinctDynamicTypesData & other)
+    {
+        data.insert(other.data.begin(), other.data.end());
+    }
+
+    void serialize(WriteBuffer & buf) const
+    {
+        writeVarUInt(data.size(), buf);
+        for (const auto & type : data)
+            writeStringBinary(type, buf);
+    }
+
+    void deserialize(ReadBuffer & buf)
+    {
+        size_t size;
+        readVarUInt(size, buf);
+        if (size > MAX_ARRAY_SIZE)
+            throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size (maximum: {}): {}", MAX_ARRAY_SIZE, size);
+
+        data.reserve(size);
+        String type;
+        for (size_t i = 0; i != size; ++i)
+        {
+            readStringBinary(type, buf);
+            data.insert(type);
+        }
+    }
+
+    void insertResultInto(IColumn & column)
+    {
+        /// Insert types in sorted order for better output.
+        auto & array_column = assert_cast<ColumnArray &>(column);
+        auto & string_column = assert_cast<ColumnString &>(array_column.getData());
+        std::vector<String> sorted_data(data.begin(), data.end());
+        std::sort(sorted_data.begin(), sorted_data.end());
+        for (const auto & type : sorted_data)
+            string_column.insertData(type.data(), type.size());
+        array_column.getOffsets().push_back(string_column.size());
+    }
+};
+
+/// Calculates the list of distinct data types in Dynamic column.
+class AggregateFunctionDistinctDynamicTypes final : public IAggregateFunctionDataHelper<AggregateFunctionDistinctDynamicTypesData, AggregateFunctionDistinctDynamicTypes>
+{
+public:
+    explicit AggregateFunctionDistinctDynamicTypes(const DataTypes & argument_types_)
+        : IAggregateFunctionDataHelper<AggregateFunctionDistinctDynamicTypesData, AggregateFunctionDistinctDynamicTypes>(argument_types_, {}, std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()))
+    {
+    }
+
+    String getName() const override { return "distinctDynamicTypes"; }
+
+    bool allocatesMemoryInArena() const override { return false; }
+
+    void ALWAYS_INLINE add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
+    {
+        const auto & dynamic_column = assert_cast<const ColumnDynamic & >(*columns[0]);
+        if (dynamic_column.isNullAt(row_num))
+            return;
+
+        data(place).add(dynamic_column.getTypeNameAt(row_num));
+    }
+
+    void ALWAYS_INLINE addBatchSinglePlace(
+        size_t row_begin, size_t row_end, AggregateDataPtr __restrict place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos)
+        const override
+    {
+        if (if_argument_pos >= 0 || row_begin != 0 || row_end != columns[0]->size())
+            IAggregateFunctionDataHelper::addBatchSinglePlace(row_begin, row_end, place, columns, arena, if_argument_pos);
+        /// Optimization for case when we add all rows from the column into single place.
+        /// In this case we can avoid iterating over all rows because we can get all types
+        /// in Dynamic column in a more efficient way.
+        else
+            assert_cast<const ColumnDynamic & >(*columns[0]).getAllTypeNamesInto(data(place).data);
+    }
+
+    void addManyDefaults(
+        AggregateDataPtr __restrict /*place*/,
+        const IColumn ** /*columns*/,
+        size_t /*length*/,
+        Arena * /*arena*/) const override
+    {
+        /// Default value for Dynamic is NULL, so nothing to add.
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
+    {
+        data(place).merge(data(rhs));
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
+    {
+        data(place).serialize(buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
+    {
+        data(place).deserialize(buf);
+    }
+
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
+    {
+        data(place).insertResultInto(to);
+    }
+};
+
+AggregateFunctionPtr createAggregateFunctionDistinctDynamicTypes(
+    const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
+{
+    assertNoParameters(name, parameters);
+    if (argument_types.size() != 1)
+        throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+                        "Incorrect number of arguments for aggregate function {}. Expected single argument with type Dynamic, got {} arguments", name, argument_types.size());
+
+    if (!isDynamic(argument_types[0]))
+        throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument for aggregate function {}. Expected type Dynamic", argument_types[0]->getName(), name);
+
+    return std::make_shared<AggregateFunctionDistinctDynamicTypes>(argument_types);
+}
+
+void registerAggregateFunctionDistinctDynamicTypes(AggregateFunctionFactory & factory)
+{
+    factory.registerFunction("distinctDynamicTypes", createAggregateFunctionDistinctDynamicTypes);
+}
+
+}
--- a/src/AggregateFunctions/AggregateFunctionDistinctJSONPaths.cpp
+++ b/src/AggregateFunctions/AggregateFunctionDistinctJSONPaths.cpp
@ -0,0 +1,350 @@
+#include <unordered_set>
+#include <IO/WriteHelpers.h>
+#include <IO/ReadHelpers.h>
+
+#include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeMap.h>
+#include <DataTypes/DataTypeObject.h>
+#include <DataTypes/DataTypesBinaryEncoding.h>
+#include <Columns/ColumnDynamic.h>
+#include <Columns/ColumnObject.h>
+#include <Columns/ColumnMap.h>
+
+#include <AggregateFunctions/IAggregateFunction.h>
+#include <AggregateFunctions/AggregateFunctionFactory.h>
+#include <AggregateFunctions/FactoryHelpers.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+    extern const int TOO_LARGE_ARRAY_SIZE;
+}
+
+constexpr static size_t DISTINCT_JSON_PATHS_MAX_ARRAY_SIZE = 0xFFFFFF;
+
+
+struct AggregateFunctionDistinctJSONPathsData
+{
+    static constexpr auto name = "distinctJSONPaths";
+
+    std::unordered_set<String> data;
+
+    void add(const ColumnObject & column, size_t row_num, const std::unordered_map<String, String> &)
+    {
+        for (const auto & [path, _] : column.getTypedPaths())
+            data.insert(path);
+        for (const auto & [path, dynamic_column] : column.getDynamicPathsPtrs())
+        {
+            /// Add path from dynamic paths only if it's not NULL in this row.
+            if (!dynamic_column->isNullAt(row_num))
+                data.insert(path);
+        }
+
+        /// Iterate over paths in shared data in this row.
+        const auto [shared_data_paths, _] = column.getSharedDataPathsAndValues();
+        const auto & shared_data_offsets = column.getSharedDataOffsets();
+        const size_t start = shared_data_offsets[static_cast<ssize_t>(row_num) - 1];
+        const size_t end = shared_data_offsets[static_cast<ssize_t>(row_num)];
+        for (size_t i = start; i != end; ++i)
+            data.insert(shared_data_paths->getDataAt(i).toString());
+    }
+
+    void addWholeColumn(const ColumnObject & column, const std::unordered_map<String, String> &)
+    {
+        for (const auto & [path, _] : column.getTypedPaths())
+            data.insert(path);
+        for (const auto & [path, dynamic_column] : column.getDynamicPathsPtrs())
+        {
+            /// Add dynamic path only if it has at least one non-null value.
+            /// getNumberOfDefaultRows for Dynamic column is O(1).
+            if (dynamic_column->getNumberOfDefaultRows() != dynamic_column->size())
+                data.insert(path);
+        }
+
+        /// Iterate over all paths in shared data.
+        const auto [shared_data_paths, _] = column.getSharedDataPathsAndValues();
+        for (size_t i = 0; i != shared_data_paths->size(); ++i)
+            data.insert(shared_data_paths->getDataAt(i).toString());
+    }
+
+    void merge(const AggregateFunctionDistinctJSONPathsData & other)
+    {
+        data.insert(other.data.begin(), other.data.end());
+    }
+
+    void serialize(WriteBuffer & buf) const
+    {
+        writeVarUInt(data.size(), buf);
+        for (const auto & path : data)
+            writeStringBinary(path, buf);
+    }
+
+    void deserialize(ReadBuffer & buf)
+    {
+        size_t size;
+        readVarUInt(size, buf);
+        if (size > DISTINCT_JSON_PATHS_MAX_ARRAY_SIZE)
+            throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size (maximum: {}): {}", DISTINCT_JSON_PATHS_MAX_ARRAY_SIZE, size);
+
+        String path;
+        for (size_t i = 0; i != size; ++i)
+        {
+            readStringBinary(path, buf);
+            data.insert(path);
+        }
+    }
+
+    void insertResultInto(IColumn & column)
+    {
+        /// Insert paths in sorted order for better output.
+        auto & array_column = assert_cast<ColumnArray &>(column);
+        auto & string_column = assert_cast<ColumnString &>(array_column.getData());
+        std::vector<String> sorted_data(data.begin(), data.end());
+        std::sort(sorted_data.begin(), sorted_data.end());
+        for (const auto & path : sorted_data)
+            string_column.insertData(path.data(), path.size());
+        array_column.getOffsets().push_back(string_column.size());
+    }
+
+    static DataTypePtr getResultType()
+    {
+        return std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>());
+    }
+};
+
+struct AggregateFunctionDistinctJSONPathsAndTypesData
+{
+    static constexpr auto name = "distinctJSONPathsAndTypes";
+
+    std::unordered_map<String, std::unordered_set<String>> data;
+
+    void add(const ColumnObject & column, size_t row_num, const std::unordered_map<String, String> & typed_paths_type_names)
+    {
+        for (const auto & [path, _] : column.getTypedPaths())
+            data[path].insert(typed_paths_type_names.at(path));
+        for (const auto & [path, dynamic_column] : column.getDynamicPathsPtrs())
+        {
+            if (!dynamic_column->isNullAt(row_num))
+                data[path].insert(dynamic_column->getTypeNameAt(row_num));
+        }
+
+        /// Iterate over paths om shared data in this row and decode the data types.
+        const auto [shared_data_paths, shared_data_values] = column.getSharedDataPathsAndValues();
+        const auto & shared_data_offsets = column.getSharedDataOffsets();
+        const size_t start = shared_data_offsets[static_cast<ssize_t>(row_num) - 1];
+        const size_t end = shared_data_offsets[static_cast<ssize_t>(row_num)];
+        for (size_t i = start; i != end; ++i)
+        {
+            auto path = shared_data_paths->getDataAt(i).toString();
+            auto value = shared_data_values->getDataAt(i);
+            ReadBufferFromMemory buf(value.data, value.size);
+            auto type = decodeDataType(buf);
+            /// We should not have Nulls here but let's check just in case.
+            chassert(!isNothing(type));
+            data[path].insert(type->getName());
+        }
+    }
+
+    void addWholeColumn(const ColumnObject & column, const std::unordered_map<String, String> & typed_paths_type_names)
+    {
+        for (const auto & [path, _] : column.getTypedPaths())
+            data[path].insert(typed_paths_type_names.at(path));
+        for (const auto & [path, dynamic_column] : column.getDynamicPathsPtrs())
+        {
+            /// Add dynamic path only if it has at least one non-null value.
+            /// getNumberOfDefaultRows for Dynamic column is O(1).
+            if (dynamic_column->getNumberOfDefaultRows() != dynamic_column->size())
+                dynamic_column->getAllTypeNamesInto(data[path]);
+        }
+
+        /// Iterate over all paths in shared data and decode the data types.
+        const auto [shared_data_paths, shared_data_values] = column.getSharedDataPathsAndValues();
+        for (size_t i = 0; i != shared_data_paths->size(); ++i)
+        {
+            auto path = shared_data_paths->getDataAt(i).toString();
+            auto value = shared_data_values->getDataAt(i);
+            ReadBufferFromMemory buf(value.data, value.size);
+            auto type = decodeDataType(buf);
+            /// We should not have Nulls here but let's check just in case.
+            chassert(!isNothing(type));
+            data[path].insert(type->getName());
+        }
+    }
+
+    void merge(const AggregateFunctionDistinctJSONPathsAndTypesData & other)
+    {
+        for (const auto & [path, types] : other.data)
+            data[path].insert(types.begin(), types.end());
+    }
+
+    void serialize(WriteBuffer & buf) const
+    {
+        writeVarUInt(data.size(), buf);
+        for (const auto & [path, types] : data)
+        {
+            writeStringBinary(path, buf);
+            writeVarUInt(types.size(), buf);
+            for (const auto & type : types)
+                writeStringBinary(type, buf);
+        }
+    }
+
+    void deserialize(ReadBuffer & buf)
+    {
+        size_t paths_size, types_size;
+        readVarUInt(paths_size, buf);
+        if (paths_size > DISTINCT_JSON_PATHS_MAX_ARRAY_SIZE)
+            throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size for paths (maximum: {}): {}", DISTINCT_JSON_PATHS_MAX_ARRAY_SIZE, paths_size);
+
+        data.reserve(paths_size);
+        String path, type;
+        for (size_t i = 0; i != paths_size; ++i)
+        {
+            readStringBinary(path, buf);
+            readVarUInt(types_size, buf);
+            if (types_size > DISTINCT_JSON_PATHS_MAX_ARRAY_SIZE)
+                throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size for types (maximum: {}): {}", DISTINCT_JSON_PATHS_MAX_ARRAY_SIZE, types_size);
+
+            data[path].reserve(types_size);
+            for (size_t j = 0; j != types_size; ++j)
+            {
+                readStringBinary(type, buf);
+                data[path].insert(type);
+            }
+        }
+    }
+
+    void insertResultInto(IColumn & column)
+    {
+        /// Insert sorted paths and types for better output.
+        auto & array_column = assert_cast<ColumnMap &>(column).getNestedColumn();
+        auto & tuple_column = assert_cast<ColumnTuple &>(array_column.getData());
+        auto & key_column = assert_cast<ColumnString &>(tuple_column.getColumn(0));
+        auto & value_column = assert_cast<ColumnArray &>(tuple_column.getColumn(1));
+        auto & value_column_data = assert_cast<ColumnString &>(value_column.getData());
+        std::vector<std::pair<String, std::vector<String>>> sorted_data;
+        sorted_data.reserve(data.size());
+        for (const auto & [path, types] : data)
+        {
+            std::vector<String> sorted_types(types.begin(), types.end());
+            std::sort(sorted_types.begin(), sorted_types.end());
+            sorted_data.emplace_back(path, std::move(sorted_types));
+        }
+        std::sort(sorted_data.begin(), sorted_data.end());
+
+        for (const auto & [path, types] : sorted_data)
+        {
+            key_column.insertData(path.data(), path.size());
+            for (const auto & type : types)
+                value_column_data.insertData(type.data(), type.size());
+            value_column.getOffsets().push_back(value_column_data.size());
+        }
+
+        array_column.getOffsets().push_back(key_column.size());
+    }
+
+    static DataTypePtr getResultType()
+    {
+        return std::make_shared<DataTypeMap>(std::make_shared<DataTypeString>(), std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()));
+    }
+};
+
+/// Calculates the list of distinct paths or pairs (path, type) in JSON column.
+template <typename Data>
+class AggregateFunctionDistinctJSONPathsAndTypes final : public IAggregateFunctionDataHelper<Data, AggregateFunctionDistinctJSONPathsAndTypes<Data>>
+{
+public:
+    explicit AggregateFunctionDistinctJSONPathsAndTypes(const DataTypes & argument_types_)
+        : IAggregateFunctionDataHelper<Data, AggregateFunctionDistinctJSONPathsAndTypes<Data>>(
+            argument_types_, {}, Data::getResultType())
+    {
+        const auto & typed_paths_types = assert_cast<const DataTypeObject &>(*argument_types_[0]).getTypedPaths();
+        typed_paths_type_names.reserve(typed_paths_types.size());
+        for (const auto & [path, type] : typed_paths_types)
+            typed_paths_type_names[path] = type->getName();
+    }
+
+    String getName() const override { return Data::name; }
+
+    bool allocatesMemoryInArena() const override { return false; }
+
+    void ALWAYS_INLINE add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
+    {
+        const auto & object_column = assert_cast<const ColumnObject & >(*columns[0]);
+        this->data(place).add(object_column, row_num, typed_paths_type_names);
+    }
+
+    void ALWAYS_INLINE addBatchSinglePlace(
+        size_t row_begin, size_t row_end, AggregateDataPtr __restrict place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos)
+        const override
+    {
+        if (if_argument_pos >= 0 || row_begin != 0 || row_end != columns[0]->size())
+            IAggregateFunctionDataHelper<Data, AggregateFunctionDistinctJSONPathsAndTypes<Data>>::addBatchSinglePlace(row_begin, row_end, place, columns, arena, if_argument_pos);
+        /// Optimization for case when we add all rows from the column into single place.
+        /// In this case we can avoid iterating over all rows because we can get all paths
+        /// and types in JSON column in a more efficient way.
+        else
+            this->data(place).addWholeColumn(assert_cast<const ColumnObject & >(*columns[0]), typed_paths_type_names);
+    }
+
+    void addManyDefaults(
+        AggregateDataPtr __restrict /*place*/,
+        const IColumn ** /*columns*/,
+        size_t /*length*/,
+        Arena * /*arena*/) const override
+    {
+        /// Default value for JSON is empty object, so nothing to add.
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
+    {
+        this->data(place).merge(this->data(rhs));
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
+    {
+        this->data(place).serialize(buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
+    {
+        this->data(place).deserialize(buf);
+    }
+
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
+    {
+        this->data(place).insertResultInto(to);
+    }
+
+private:
+    std::unordered_map<String, String> typed_paths_type_names;
+};
+
+template <typename Data>
+AggregateFunctionPtr createAggregateFunctionDistinctJSONPathsAndTypes(
+    const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
+{
+    assertNoParameters(name, parameters);
+    if (argument_types.size() != 1)
+        throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+                        "Incorrect number of arguments for aggregate function {}. Expected single argument with type JSON, got {} arguments", name, argument_types.size());
+
+    if (!isObject(argument_types[0]))
+        throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument for aggregate function {}. Expected type JSON", argument_types[0]->getName(), name);
+
+    return std::make_shared<AggregateFunctionDistinctJSONPathsAndTypes<Data>>(argument_types);
+}
+
+void registerAggregateFunctionDistinctJSONPathsAndTypes(AggregateFunctionFactory & factory)
+{
+    factory.registerFunction("distinctJSONPaths", createAggregateFunctionDistinctJSONPathsAndTypes<AggregateFunctionDistinctJSONPathsData>);
+    factory.registerFunction("distinctJSONPathsAndTypes", createAggregateFunctionDistinctJSONPathsAndTypes<AggregateFunctionDistinctJSONPathsAndTypesData>);
+}
+
+}
--- a/src/AggregateFunctions/registerAggregateFunctions.cpp
+++ b/src/AggregateFunctions/registerAggregateFunctions.cpp
@ -89,6 +89,8 @@ void registerAggregateFunctionAnalysisOfVariance(AggregateFunctionFactory &);
 void registerAggregateFunctionFlameGraph(AggregateFunctionFactory &);
 void registerAggregateFunctionKolmogorovSmirnovTest(AggregateFunctionFactory & factory);
 void registerAggregateFunctionLargestTriangleThreeBuckets(AggregateFunctionFactory & factory);
+void registerAggregateFunctionDistinctDynamicTypes(AggregateFunctionFactory & factory);
+void registerAggregateFunctionDistinctJSONPathsAndTypes(AggregateFunctionFactory & factory);

 class AggregateFunctionCombinatorFactory;
 void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &);
@ -191,6 +193,8 @@ void registerAggregateFunctions()
        registerAggregateFunctionFlameGraph(factory);
        registerAggregateFunctionKolmogorovSmirnovTest(factory);
        registerAggregateFunctionLargestTriangleThreeBuckets(factory);
+        registerAggregateFunctionDistinctDynamicTypes(factory);
+        registerAggregateFunctionDistinctJSONPathsAndTypes(factory);

        registerWindowFunctions(factory);
    }
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@ -34,6 +34,7 @@
 #include <Parsers/Access/ASTCreateUserQuery.h>
 #include <Parsers/Access/ASTAuthenticationData.h>
 #include <Parsers/ASTDropQuery.h>
+#include <Parsers/ASTExplainQuery.h>
 #include <Parsers/ASTSelectQuery.h>
 #include <Parsers/ASTSetQuery.h>
 #include <Parsers/ASTUseQuery.h>
@ -2111,6 +2112,15 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText(
    // - Other formats (e.g. FORMAT CSV) are arbitrarily more complex and tricky to parse. For example, we may be unable to distinguish if the semicolon
    //   is part of the data or ends the statement. In this case, we simply assume that the end of the INSERT statement is determined by \n\n (two newlines).
    auto * insert_ast = parsed_query->as<ASTInsertQuery>();
+    // We also consider the INSERT query in EXPLAIN queries (same as normal INSERT queries)
+    if (!insert_ast)
+    {
+        auto * explain_ast = parsed_query->as<ASTExplainQuery>();
+        if (explain_ast && explain_ast->getExplainedQuery())
+        {
+            insert_ast = explain_ast->getExplainedQuery()->as<ASTInsertQuery>();
+        }
+    }
    const char * query_to_execute_end = this_query_end;
    if (insert_ast && insert_ast->data)
    {
@ -2689,14 +2699,6 @@ bool ClientBase::processMultiQueryFromFile(const String & file_name)
    ReadBufferFromFile in(file_name);
    readStringUntilEOF(queries_from_file, in);

-    if (!getClientConfiguration().has("log_comment"))
-    {
-        Settings settings = client_context->getSettingsCopy();
-        /// NOTE: cannot use even weakly_canonical() since it fails for /dev/stdin due to resolving of "pipe:[X]"
-        settings.log_comment = fs::absolute(fs::path(file_name));
-        client_context->setSettings(settings);
-    }
-
    return executeMultiQuery(queries_from_file);
 }

--- a/src/Columns/ColumnDynamic.cpp
+++ b/src/Columns/ColumnDynamic.cpp
@ -979,6 +979,41 @@ ColumnPtr ColumnDynamic::compress() const
        });
 }

+String ColumnDynamic::getTypeNameAt(size_t row_num) const
+{
+    const auto & variant_col = getVariantColumn();
+    const size_t discr = variant_col.globalDiscriminatorAt(row_num);
+    if (discr == ColumnVariant::NULL_DISCRIMINATOR)
+        return "";
+
+    if (discr == getSharedVariantDiscriminator())
+    {
+        const auto value = getSharedVariant().getDataAt(variant_col.offsetAt(row_num));
+        ReadBufferFromMemory buf(value.data, value.size);
+        return decodeDataType(buf)->getName();
+    }
+
+    return variant_info.variant_names[discr];
+}
+
+void ColumnDynamic::getAllTypeNamesInto(std::unordered_set<String> & names) const
+{
+    auto shared_variant_discr = getSharedVariantDiscriminator();
+    for (size_t i = 0; i != variant_info.variant_names.size(); ++i)
+    {
+        if (i != shared_variant_discr && !variant_column_ptr->getVariantByGlobalDiscriminator(i).empty())
+            names.insert(variant_info.variant_names[i]);
+    }
+
+    const auto & shared_variant = getSharedVariant();
+    for (size_t i = 0; i != shared_variant.size(); ++i)
+    {
+        const auto value = shared_variant.getDataAt(i);
+        ReadBufferFromMemory buf(value.data, value.size);
+        names.insert(decodeDataType(buf)->getName());
+    }
+}
+
 void ColumnDynamic::prepareForSquashing(const Columns & source_columns)
 {
    if (source_columns.empty())
--- a/src/Columns/ColumnDynamic.h
+++ b/src/Columns/ColumnDynamic.h
@ -430,6 +430,9 @@ public:

    const SerializationPtr & getVariantSerialization(const DataTypePtr & variant_type) { return getVariantSerialization(variant_type, variant_type->getName()); }

+    String getTypeNameAt(size_t row_num) const;
+    void getAllTypeNamesInto(std::unordered_set<String> & names) const;
+
 private:
    void createVariantInfo(const DataTypePtr & variant_type);

--- a/src/Common/CurrentMetrics.cpp
+++ b/src/Common/CurrentMetrics.cpp
@ -75,9 +75,9 @@
    M(GlobalThread, "Number of threads in global thread pool.") \
    M(GlobalThreadActive, "Number of threads in global thread pool running a task.") \
    M(GlobalThreadScheduled, "Number of queued or active jobs in global thread pool.") \
-    M(LocalThread, "Number of threads in local thread pools. The threads in local thread pools are taken from the global thread pool.") \
-    M(LocalThreadActive, "Number of threads in local thread pools running a task.") \
-    M(LocalThreadScheduled, "Number of queued or active jobs in local thread pools.") \
+    M(LocalThread, "Obsolete. Number of threads in local thread pools. The threads in local thread pools are taken from the global thread pool.") \
+    M(LocalThreadActive, "Obsolete. Number of threads in local thread pools running a task.") \
+    M(LocalThreadScheduled, "Obsolete. Number of queued or active jobs in local thread pools.") \
    M(MergeTreeDataSelectExecutorThreads, "Number of threads in the MergeTreeDataSelectExecutor thread pool.") \
    M(MergeTreeDataSelectExecutorThreadsActive, "Number of threads in the MergeTreeDataSelectExecutor thread pool running a task.") \
    M(MergeTreeDataSelectExecutorThreadsScheduled, "Number of queued or active jobs in the MergeTreeDataSelectExecutor thread pool.") \
@ -292,6 +292,9 @@
    M(DistrCacheWriteRequests, "Number of executed Write requests to Distributed Cache") \
    M(DistrCacheServerConnections, "Number of open connections to ClickHouse server from Distributed Cache") \
    \
+    M(SchedulerIOReadScheduled, "Number of IO reads are being scheduled currently") \
+    M(SchedulerIOWriteScheduled, "Number of IO writes are being scheduled currently") \
+    \
    M(StorageConnectionsStored, "Total count of sessions stored in the session pool for storages") \
    M(StorageConnectionsTotal, "Total count of all sessions: stored in the pool and actively used right now for storages") \
    \
--- a/src/Common/CurrentThread.cpp
+++ b/src/Common/CurrentThread.cpp
@ -113,6 +113,56 @@ std::string_view CurrentThread::getQueryId()
    return current_thread->getQueryId();
 }

+void CurrentThread::attachReadResource(ResourceLink link)
+{
+    if (unlikely(!current_thread))
+        return;
+    if (current_thread->read_resource_link)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Thread #{} has been already attached to read resource", std::to_string(getThreadId()));
+    current_thread->read_resource_link = link;
+}
+
+void CurrentThread::detachReadResource()
+{
+    if (unlikely(!current_thread))
+        return;
+    if (!current_thread->read_resource_link)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Thread #{} has not been attached to read resource", std::to_string(getThreadId()));
+    current_thread->read_resource_link.reset();
+}
+
+ResourceLink CurrentThread::getReadResourceLink()
+{
+    if (unlikely(!current_thread))
+        return {};
+    return current_thread->read_resource_link;
+}
+
+void CurrentThread::attachWriteResource(ResourceLink link)
+{
+    if (unlikely(!current_thread))
+        return;
+    if (current_thread->write_resource_link)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Thread #{} has been already attached to write resource", std::to_string(getThreadId()));
+    current_thread->write_resource_link = link;
+}
+
+void CurrentThread::detachWriteResource()
+{
+    if (unlikely(!current_thread))
+        return;
+    if (!current_thread->write_resource_link)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Thread #{} has not been attached to write resource", std::to_string(getThreadId()));
+    current_thread->write_resource_link.reset();
+}
+
+ResourceLink CurrentThread::getWriteResourceLink()
+{
+    if (unlikely(!current_thread))
+        return {};
+    return current_thread->write_resource_link;
+}
+
 MemoryTracker * CurrentThread::getUserMemoryTracker()
 {
    if (unlikely(!current_thread))
--- a/src/Common/CurrentThread.h
+++ b/src/Common/CurrentThread.h
@ -2,6 +2,7 @@

 #include <Interpreters/Context_fwd.h>
 #include <Common/ThreadStatus.h>
+#include <Common/Scheduler/ResourceLink.h>

 #include <memory>
 #include <string>
@ -23,7 +24,6 @@ class QueryStatus;
 struct Progress;
 class InternalTextLogsQueue;

-
 /** Collection of static methods to work with thread-local objects.
  * Allows to attach and detach query/process (thread group) to a thread
  * (to calculate query-related metrics and to allow to obtain query-related data from a thread).
@ -92,6 +92,14 @@ public:

    static std::string_view getQueryId();

+    // For IO Scheduling
+    static void attachReadResource(ResourceLink link);
+    static void detachReadResource();
+    static ResourceLink getReadResourceLink();
+    static void attachWriteResource(ResourceLink link);
+    static void detachWriteResource();
+    static ResourceLink getWriteResourceLink();
+
    /// Initializes query with current thread as master thread in constructor, and detaches it in destructor
    struct QueryScope : private boost::noncopyable
    {
@ -102,6 +110,39 @@ public:
        void logPeakMemoryUsage();
        bool log_peak_memory_usage_in_destructor = true;
    };
+
+    /// Scoped attach/detach of IO resource links
+    struct IOScope : private boost::noncopyable
+    {
+        explicit IOScope(ResourceLink read_resource_link, ResourceLink write_resource_link)
+        {
+            if (read_resource_link)
+            {
+                attachReadResource(read_resource_link);
+                read_attached = true;
+            }
+            if (write_resource_link)
+            {
+                attachWriteResource(write_resource_link);
+                write_attached = true;
+            }
+        }
+
+        explicit IOScope(const IOSchedulingSettings & settings)
+            : IOScope(settings.read_resource_link, settings.write_resource_link)
+        {}
+
+        ~IOScope()
+        {
+            if (read_attached)
+                detachReadResource();
+            if (write_attached)
+                detachWriteResource();
+        }
+
+        bool read_attached = false;
+        bool write_attached = false;
+    };
 };

 }
--- a/src/Common/HTTPConnectionPool.cpp
+++ b/src/Common/HTTPConnectionPool.cpp
@ -2,6 +2,7 @@
 #include <Common/HostResolvePool.h>

 #include <Common/ProfileEvents.h>
+#include <Common/Stopwatch.h>
 #include <Common/CurrentMetrics.h>
 #include <Common/logger_useful.h>
 #include <Common/Exception.h>
@ -9,6 +10,7 @@
 #include <Common/ProxyConfiguration.h>
 #include <Common/MemoryTrackerSwitcher.h>
 #include <Common/SipHash.h>
+#include <Common/Scheduler/ResourceGuard.h>
 #include <Common/proxyConfigurationToPocoProxyConfig.h>

 #include <Poco/Net/HTTPChunkedStream.h>
@ -236,6 +238,59 @@ public:
 };


+// Session data hooks implementation for integration with resource scheduler.
+// Hooks are created per every request-response pair and are registered/unregistered in HTTP session.
+// * `atStart()` send resource request to the scheduler every time HTTP session is going to send or receive
+//   data to/from socket. `start()` waits for the scheduler confirmation. This way scheduler might
+//   throttle and/or schedule socket data streams.
+// * `atFinish()` hook is called on successful socket read/write operation.
+//   It informs the scheduler that operation is complete, which allows the scheduler to control the total
+//   amount of in-flight bytes and/or operations.
+// * `atFail()` hook is called on failure of socket operation. The purpose is to correct the amount of bytes
+//   passed through the scheduler queue to ensure fair bandwidth allocation even in presence of errors.
+struct ResourceGuardSessionDataHooks : public Poco::Net::IHTTPSessionDataHooks
+{
+    ResourceGuardSessionDataHooks(ResourceLink link_, const ResourceGuard::Metrics * metrics, LoggerPtr log_, const String & method, const String & uri)
+        : link(link_)
+        , log(log_)
+        , http_request(method + " " + uri)
+    {
+        request.metrics = metrics;
+        chassert(link);
+    }
+
+    ~ResourceGuardSessionDataHooks() override
+    {
+        request.assertFinished(); // Never destruct with an active request
+    }
+
+    void atStart(int bytes) override
+    {
+        Stopwatch timer;
+        request.enqueue(bytes, link);
+        request.wait();
+        timer.stop();
+        if (timer.elapsedMilliseconds() >= 5000)
+            LOG_INFO(log, "Resource request took too long to finish: {} ms for {}", timer.elapsedMilliseconds(), http_request);
+    }
+
+    void atFinish(int bytes) override
+    {
+        request.finish(bytes, link);
+    }
+
+    void atFail() override
+    {
+        request.finish(0, link);
+    }
+
+    ResourceLink link;
+    ResourceGuard::Request request;
+    LoggerPtr log;
+    String http_request;
+};
+
+
 // EndpointConnectionPool manage connections to the endpoint
 // Features:
 // - it uses HostResolver for address selecting. See Common/HostResolver.h for more info.
@ -246,8 +301,6 @@ public:
 // - `Session::reconnect()` uses the pool as well
 // - comprehensive sensors
 // - session is reused according its inner state, automatically
-
-
 template <class Session>
 class EndpointConnectionPool : public std::enable_shared_from_this<EndpointConnectionPool<Session>>, public IExtendedPool
 {
@ -337,6 +390,13 @@ private:
        std::ostream & sendRequest(Poco::Net::HTTPRequest & request) override
        {
            auto idle = idleTime();
+
+            // Set data hooks for IO scheduling
+            if (ResourceLink link = CurrentThread::getReadResourceLink())
+                Session::setReceiveDataHooks(std::make_shared<ResourceGuardSessionDataHooks>(link, ResourceGuard::Metrics::getIORead(), log, request.getMethod(), request.getURI()));
+            if (ResourceLink link = CurrentThread::getWriteResourceLink())
+                Session::setSendDataHooks(std::make_shared<ResourceGuardSessionDataHooks>(link, ResourceGuard::Metrics::getIOWrite(), log, request.getMethod(), request.getURI()));
+
            std::ostream & result = Session::sendRequest(request);
            result.exceptions(std::ios::badbit);

@ -393,6 +453,8 @@ private:
                }
            }
            response_stream = nullptr;
+            Session::setSendDataHooks();
+            Session::setReceiveDataHooks();

            group->atConnectionDestroy();

--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@ -86,6 +86,20 @@
    M(NetworkReceiveBytes, "Total number of bytes received from network. Only ClickHouse-related network interaction is included, not by 3rd party libraries.") \
    M(NetworkSendBytes, "Total number of bytes send to network. Only ClickHouse-related network interaction is included, not by 3rd party libraries.") \
    \
+    M(GlobalThreadPoolExpansions, "Counts the total number of times new threads have been added to the global thread pool. This metric indicates the frequency of expansions in the global thread pool to accommodate increased processing demands.") \
+    M(GlobalThreadPoolShrinks, "Counts the total number of times the global thread pool has shrunk by removing threads. This occurs when the number of idle threads exceeds max_thread_pool_free_size, indicating adjustments in the global thread pool size in response to decreased thread utilization.") \
+    M(GlobalThreadPoolThreadCreationMicroseconds, "Total time spent waiting for new threads to start.") \
+    M(GlobalThreadPoolLockWaitMicroseconds, "Total time threads have spent waiting for locks in the global thread pool.") \
+    M(GlobalThreadPoolJobs, "Counts the number of jobs that have been pushed to the global thread pool.") \
+    M(GlobalThreadPoolJobWaitTimeMicroseconds, "Measures the elapsed time from when a job is scheduled in the thread pool to when it is picked up for execution by a worker thread. This metric helps identify delays in job processing, indicating the responsiveness of the thread pool to new tasks.") \
+    M(LocalThreadPoolExpansions, "Counts the total number of times threads have been borrowed from the global thread pool to expand local thread pools.") \
+    M(LocalThreadPoolShrinks, "Counts the total number of times threads have been returned to the global thread pool from local thread pools.") \
+    M(LocalThreadPoolThreadCreationMicroseconds, "Total time local thread pools have spent waiting to borrow a thread from the global pool.") \
+    M(LocalThreadPoolLockWaitMicroseconds, "Total time threads have spent waiting for locks in the local thread pools.") \
+    M(LocalThreadPoolJobs, "Counts the number of jobs that have been pushed to the local thread pools.") \
+    M(LocalThreadPoolBusyMicroseconds, "Total time threads have spent executing the actual work.") \
+    M(LocalThreadPoolJobWaitTimeMicroseconds, "Measures the elapsed time from when a job is scheduled in the thread pool to when it is picked up for execution by a worker thread. This metric helps identify delays in job processing, indicating the responsiveness of the thread pool to new tasks.") \
+    \
    M(DiskS3GetRequestThrottlerCount, "Number of DiskS3 GET and SELECT requests passed through throttler.") \
    M(DiskS3GetRequestThrottlerSleepMicroseconds, "Total time a query was sleeping to conform DiskS3 GET and SELECT request throttling.") \
    M(DiskS3PutRequestThrottlerCount, "Number of DiskS3 PUT, COPY, POST and LIST requests passed through throttler.") \
@ -106,6 +120,13 @@
    M(PartsWithAppliedMutationsOnFly, "Total number of parts for which there was any mutation applied on fly") \
    M(MutationsAppliedOnFlyInAllParts, "The sum of number of applied mutations on-fly for part among all read parts") \
    \
+    M(SchedulerIOReadRequests, "Resource requests passed through scheduler for IO reads.") \
+    M(SchedulerIOReadBytes, "Bytes passed through scheduler for IO reads.") \
+    M(SchedulerIOReadWaitMicroseconds, "Total time a query was waiting on resource requests for IO reads.") \
+    M(SchedulerIOWriteRequests, "Resource requests passed through scheduler for IO writes.") \
+    M(SchedulerIOWriteBytes, "Bytes passed through scheduler for IO writes.") \
+    M(SchedulerIOWriteWaitMicroseconds, "Total time a query was waiting on resource requests for IO writes.") \
+    \
    M(QueryMaskingRulesMatch, "Number of times query masking rules was successfully matched.") \
    \
    M(ReplicatedPartFetches, "Number of times a data part was downloaded from replica of a ReplicatedMergeTree table.") \
--- a/src/Common/ProgressIndication.cpp
+++ b/src/Common/ProgressIndication.cpp
@ -34,13 +34,16 @@ bool ProgressIndication::updateProgress(const Progress & value)

 void ProgressIndication::resetProgress()
 {
-    watch.restart();
-    progress.reset();
-    show_progress_bar = false;
-    written_progress_chars = 0;
-    write_progress_on_update = false;
+    {
+        std::lock_guard lock(progress_mutex);
+        progress.reset();
+        show_progress_bar = false;
+        written_progress_chars = 0;
+        write_progress_on_update = false;
+    }
    {
        std::lock_guard lock(profile_events_mutex);
+        watch.restart();
        cpu_usage_meter.reset(getElapsedNanoseconds());
        hosts_data.clear();
    }
@ -90,6 +93,8 @@ ProgressIndication::MemoryUsage ProgressIndication::getMemoryUsage() const

 void ProgressIndication::writeFinalProgress()
 {
+    std::lock_guard lock(progress_mutex);
+
    if (progress.read_rows < 1000)
        return;

@ -271,6 +276,8 @@ void ProgressIndication::writeProgress(WriteBufferFromFileDescriptor & message)

 void ProgressIndication::clearProgressOutput(WriteBufferFromFileDescriptor & message)
 {
+    std::lock_guard lock(progress_mutex);
+
    if (written_progress_chars)
    {
        written_progress_chars = 0;
--- a/src/Common/ProgressIndication.h
+++ b/src/Common/ProgressIndication.h
@ -115,6 +115,8 @@ private:
    /// It is possible concurrent access to the following:
    /// - writeProgress() (class properties) (guarded with progress_mutex)
    /// - hosts_data/cpu_usage_meter (guarded with profile_events_mutex)
+    ///
+    /// It is also possible to have more races if query is cancelled, so that clearProgressOutput() is called concurrently
    mutable std::mutex profile_events_mutex;
    mutable std::mutex progress_mutex;

--- a/src/Common/Scheduler/ISchedulerQueue.h
+++ b/src/Common/Scheduler/ISchedulerQueue.h
@ -22,10 +22,13 @@ public:
    {}

    // Wrapper for `enqueueRequest()` that should be used to account for available resource budget
-    void enqueueRequestUsingBudget(ResourceRequest * request)
+    // Returns `estimated_cost` that should be passed later to `adjustBudget()`
+    [[ nodiscard ]] ResourceCost enqueueRequestUsingBudget(ResourceRequest * request)
    {
-        request->cost = budget.ask(request->cost);
+        ResourceCost estimated_cost = request->cost;
+        request->cost = budget.ask(estimated_cost);
        enqueueRequest(request);
+        return estimated_cost;
    }

    // Should be called to account for difference between real and estimated costs
@ -34,18 +37,6 @@ public:
        budget.adjust(estimated_cost, real_cost);
    }

-    // Adjust budget to account for extra consumption of `cost` resource units
-    void consumeBudget(ResourceCost cost)
-    {
-        adjustBudget(0, cost);
-    }
-
-    // Adjust budget to account for requested, but not consumed `cost` resource units
-    void accumulateBudget(ResourceCost cost)
-    {
-        adjustBudget(cost, 0);
-    }
-
    /// Enqueue new request to be executed using underlying resource.
    /// Should be called outside of scheduling subsystem, implementation must be thread-safe.
    virtual void enqueueRequest(ResourceRequest * request) = 0;
--- a/src/Common/Scheduler/Nodes/tests/ResourceTest.h
+++ b/src/Common/Scheduler/Nodes/tests/ResourceTest.h
@ -232,12 +232,13 @@ struct ResourceTestManager : public ResourceTestBase
        ResourceTestManager & t;

        Guard(ResourceTestManager & t_, ResourceLink link_, ResourceCost cost)
-            : ResourceGuard(link_, cost, PostponeLocking)
+            : ResourceGuard(ResourceGuard::Metrics::getIOWrite(), link_, cost, Lock::Defer)
            , t(t_)
        {
            t.onEnqueue(link);
            lock();
            t.onExecute(link);
+            consume(cost);
        }
    };

@ -310,8 +311,9 @@ struct ResourceTestManager : public ResourceTestBase
    // NOTE: actually leader's request(s) make their own small busy period.
    void blockResource(ResourceLink link)
    {
-        ResourceGuard g(link, 1, ResourceGuard::PostponeLocking);
+        ResourceGuard g(ResourceGuard::Metrics::getIOWrite(), link, 1, ResourceGuard::Lock::Defer);
        g.lock();
+        g.consume(1);
        // NOTE: at this point we assume resource to be blocked by single request (<max_requests>1</max_requests>)
        busy_period.arrive_and_wait(); // (1) notify all followers that resource is blocked
        busy_period.arrive_and_wait(); // (2) wait all followers to enqueue their requests
@ -320,10 +322,11 @@ struct ResourceTestManager : public ResourceTestBase
    {
        getLinkData(link).left += total_requests + 1;
        busy_period.arrive_and_wait(); // (1) wait leader to block resource
-        ResourceGuard g(link, cost, ResourceGuard::PostponeLocking);
+        ResourceGuard g(ResourceGuard::Metrics::getIOWrite(), link, cost, ResourceGuard::Lock::Defer);
        onEnqueue(link);
        busy_period.arrive_and_wait(); // (2) notify leader to unblock
        g.lock();
+        g.consume(cost);
        onExecute(link);
    }
 };
--- a/src/Common/Scheduler/Nodes/tests/gtest_dynamic_resource_manager.cpp
+++ b/src/Common/Scheduler/Nodes/tests/gtest_dynamic_resource_manager.cpp
@ -36,11 +36,16 @@ TEST(SchedulerDynamicResourceManager, Smoke)

    for (int i = 0; i < 10; i++)
    {
-        ResourceGuard gA(cA->get("res1"), ResourceGuard::PostponeLocking);
+        ResourceGuard gA(ResourceGuard::Metrics::getIOWrite(), cA->get("res1"), 1, ResourceGuard::Lock::Defer);
        gA.lock();
+        gA.consume(1);
        gA.unlock();

-        ResourceGuard gB(cB->get("res1"));
+        ResourceGuard gB(ResourceGuard::Metrics::getIOWrite(), cB->get("res1"));
+        gB.unlock();
+
+        ResourceGuard gC(ResourceGuard::Metrics::getIORead(), cB->get("res1"));
+        gB.consume(2);
    }
 }

--- a/src/Common/Scheduler/Nodes/tests/gtest_resource_scheduler.cpp
+++ b/src/Common/Scheduler/Nodes/tests/gtest_resource_scheduler.cpp
@ -1,11 +1,13 @@
 #include <gtest/gtest.h>

-#include <Common/Scheduler/SchedulerRoot.h>
-
 #include <Common/Scheduler/Nodes/tests/ResourceTest.h>

+#include <Common/Scheduler/SchedulerRoot.h>
+#include <Common/randomSeed.h>
+
 #include <barrier>
 #include <future>
+#include <pcg_random.hpp>

 using namespace DB;

@ -22,6 +24,17 @@ struct ResourceTest : public ResourceTestBase
    {
        scheduler.stop(true);
    }
+
+    std::mutex rng_mutex;
+    pcg64 rng{randomSeed()};
+
+    template <typename T>
+    T randomInt(T from, T to)
+    {
+        std::uniform_int_distribution<T> distribution(from, to);
+        std::lock_guard lock(rng_mutex);
+        return distribution(rng);
+    }
 };

 struct ResourceHolder
@ -109,26 +122,55 @@ TEST(SchedulerRoot, Smoke)
    r2.registerResource();

    {
-        ResourceGuard rg(a);
+        ResourceGuard rg(ResourceGuard::Metrics::getIOWrite(), a);
        EXPECT_TRUE(fc1->requests.contains(&rg.request));
+        rg.consume(1);
    }

    {
-        ResourceGuard rg(b);
+        ResourceGuard rg(ResourceGuard::Metrics::getIOWrite(), b);
        EXPECT_TRUE(fc1->requests.contains(&rg.request));
+        rg.consume(1);
    }

    {
-        ResourceGuard rg(c);
+        ResourceGuard rg(ResourceGuard::Metrics::getIOWrite(), c);
        EXPECT_TRUE(fc2->requests.contains(&rg.request));
+        rg.consume(1);
    }

    {
-        ResourceGuard rg(d);
+        ResourceGuard rg(ResourceGuard::Metrics::getIOWrite(), d);
        EXPECT_TRUE(fc2->requests.contains(&rg.request));
+        rg.consume(1);
    }
 }

+TEST(SchedulerRoot, Budget)
+{
+    ResourceTest t;
+
+    ResourceHolder r1(t);
+    r1.add<ConstraintTest>("/", "<max_requests>1</max_requests>");
+    r1.add<PriorityPolicy>("/prio");
+    auto a = r1.addQueue("/prio/A", "");
+    r1.registerResource();
+
+    ResourceCost total_real_cost = 0;
+    int total_requests = 10;
+    for (int i = 0 ; i < total_requests; i++)
+    {
+        ResourceCost est_cost = t.randomInt(1, 10);
+        ResourceCost real_cost = t.randomInt(0, 10);
+        ResourceGuard rg(ResourceGuard::Metrics::getIOWrite(), a, est_cost);
+        rg.consume(real_cost);
+        total_real_cost += real_cost;
+    }
+
+    EXPECT_EQ(total_requests, a.queue->dequeued_requests);
+    EXPECT_EQ(total_real_cost, a.queue->dequeued_cost - a.queue->getBudget());
+}
+
 TEST(SchedulerRoot, Cancel)
 {
    ResourceTest t;
--- a/src/Common/Scheduler/ResouceLink.cpp
+++ b/src/Common/Scheduler/ResouceLink.cpp
@ -1,25 +0,0 @@
-#include <Common/Scheduler/ISchedulerQueue.h>
-#include <Common/Scheduler/ResourceLink.h>
-#include <Common/Scheduler/ResourceRequest.h>
-
-namespace DB
-{
-void ResourceLink::adjust(ResourceCost estimated_cost, ResourceCost real_cost) const
-{
-    if (queue)
-        queue->adjustBudget(estimated_cost, real_cost);
-}
-
-void ResourceLink::consumed(ResourceCost cost) const
-{
-    if (queue)
-        queue->consumeBudget(cost);
-}
-
-void ResourceLink::accumulate(DB::ResourceCost cost) const
-{
-    if (queue)
-        queue->accumulateBudget(cost);
-}
-}
-
--- a/src/Common/Scheduler/ResourceGuard.h
+++ b/src/Common/Scheduler/ResourceGuard.h
@ -7,10 +7,30 @@
 #include <Common/Scheduler/ResourceRequest.h>
 #include <Common/Scheduler/ResourceLink.h>

+#include <Common/CurrentThread.h>
+#include <Common/ProfileEvents.h>
+#include <Common/CurrentMetrics.h>
+
 #include <condition_variable>
 #include <mutex>


+namespace ProfileEvents
+{
+    extern const Event SchedulerIOReadRequests;
+    extern const Event SchedulerIOReadBytes;
+    extern const Event SchedulerIOReadWaitMicroseconds;
+    extern const Event SchedulerIOWriteRequests;
+    extern const Event SchedulerIOWriteBytes;
+    extern const Event SchedulerIOWriteWaitMicroseconds;
+}
+
+namespace CurrentMetrics
+{
+    extern const Metric SchedulerIOReadScheduled;
+    extern const Metric SchedulerIOWriteScheduled;
+}
+
 namespace DB
 {

@ -22,12 +42,42 @@ namespace DB
 class ResourceGuard
 {
 public:
-    enum ResourceGuardCtor
+    enum class Lock
    {
-        LockStraightAway, /// Locks inside constructor (default)
+        Default, /// Locks inside constructor

        // WARNING: Only for tests. It is not exception-safe because `lock()` must be called after construction.
-        PostponeLocking /// Don't lock in constructor, but send request
+        Defer /// Don't lock in constructor, but send request
+    };
+
+    struct Metrics
+    {
+        const ProfileEvents::Event requests = ProfileEvents::end();
+        const ProfileEvents::Event cost = ProfileEvents::end();
+        const ProfileEvents::Event wait_microseconds = ProfileEvents::end();
+        const CurrentMetrics::Metric scheduled_count = CurrentMetrics::end();
+
+        static const Metrics * getIORead()
+        {
+            static Metrics metrics{
+                .requests = ProfileEvents::SchedulerIOReadRequests,
+                .cost = ProfileEvents::SchedulerIOReadBytes,
+                .wait_microseconds = ProfileEvents::SchedulerIOReadWaitMicroseconds,
+                .scheduled_count = CurrentMetrics::SchedulerIOReadScheduled
+            };
+            return &metrics;
+        }
+
+        static const Metrics * getIOWrite()
+        {
+            static Metrics metrics{
+                .requests = ProfileEvents::SchedulerIOWriteRequests,
+                .cost = ProfileEvents::SchedulerIOWriteBytes,
+                .wait_microseconds = ProfileEvents::SchedulerIOWriteWaitMicroseconds,
+                .scheduled_count = CurrentMetrics::SchedulerIOWriteScheduled
+            };
+            return &metrics;
+        }
    };

    enum RequestState
@ -46,60 +96,74 @@ public:
            chassert(state == Finished);
            state = Enqueued;
            ResourceRequest::reset(cost_);
-            link_.queue->enqueueRequestUsingBudget(this);
+            estimated_cost = link_.queue->enqueueRequestUsingBudget(this); // NOTE: it modifies `cost` and enqueues request
        }

        // This function is executed inside scheduler thread and wakes thread issued this `request`.
        // That thread will continue execution and do real consumption of requested resource synchronously.
        void execute() override
        {
-            {
-                std::unique_lock lock(mutex);
-                chassert(state == Enqueued);
-                state = Dequeued;
-            }
+            std::unique_lock lock(mutex);
+            chassert(state == Enqueued);
+            state = Dequeued;
            dequeued_cv.notify_one();
        }

        void wait()
        {
+            CurrentMetrics::Increment scheduled(metrics->scheduled_count);
+            auto timer = CurrentThread::getProfileEvents().timer(metrics->wait_microseconds);
            std::unique_lock lock(mutex);
            dequeued_cv.wait(lock, [this] { return state == Dequeued; });
        }

-        void finish()
+        void finish(ResourceCost real_cost_, ResourceLink link_)
        {
            // lock(mutex) is not required because `Dequeued` request cannot be used by the scheduler thread
            chassert(state == Dequeued);
            state = Finished;
+            if (estimated_cost != real_cost_)
+                link_.queue->adjustBudget(estimated_cost, real_cost_);
            ResourceRequest::finish();
+            ProfileEvents::increment(metrics->requests);
+            ProfileEvents::increment(metrics->cost, real_cost_);
        }

-        static Request & local()
+        void assertFinished()
+        {
+            // lock(mutex) is not required because `Finished` request cannot be used by the scheduler thread
+            chassert(state == Finished);
+        }
+
+        static Request & local(const Metrics * metrics)
        {
            // Since single thread cannot use more than one resource request simultaneously,
            // we can reuse thread-local request to avoid allocations
            static thread_local Request instance;
+            instance.metrics = metrics;
            return instance;
        }

+        const Metrics * metrics = nullptr; // Must be initialized before use
+
    private:
+        ResourceCost estimated_cost = 0; // Stores initial `cost` value in case budget was used to modify it
        std::mutex mutex;
        std::condition_variable dequeued_cv;
        RequestState state = Finished;
    };

-    /// Creates pending request for resource; blocks while resource is not available (unless `PostponeLocking`)
-    explicit ResourceGuard(ResourceLink link_, ResourceCost cost = 1, ResourceGuardCtor ctor = LockStraightAway)
+    /// Creates pending request for resource; blocks while resource is not available (unless `Lock::Defer`)
+    explicit ResourceGuard(const Metrics * metrics, ResourceLink link_, ResourceCost cost = 1, ResourceGuard::Lock type = ResourceGuard::Lock::Default)
        : link(link_)
-        , request(Request::local())
+        , request(Request::local(metrics))
    {
        if (cost == 0)
-            link.queue = nullptr; // Ignore zero-cost requests
-        else if (link.queue)
+            link.reset(); // Ignore zero-cost requests
+        else if (link)
        {
            request.enqueue(cost, link);
-            if (ctor == LockStraightAway)
+            if (type == Lock::Default)
                request.wait();
        }
    }
@ -112,22 +176,29 @@ public:
    /// Blocks until resource is available
    void lock()
    {
-        if (link.queue)
+        if (link)
            request.wait();
    }

-    /// Report resource consumption has finished
-    void unlock()
+    void consume(ResourceCost cost)
    {
-        if (link.queue)
+        real_cost += cost;
+    }
+
+    /// Report resource consumption has finished
+    void unlock(ResourceCost consumed = 0)
+    {
+        consume(consumed);
+        if (link)
        {
-            request.finish();
-            link.queue = nullptr;
+            request.finish(real_cost, link);
+            link.reset();
        }
    }

    ResourceLink link;
    Request & request;
+    ResourceCost real_cost = 0;
 };

 }
--- a/src/Common/Scheduler/ResourceLink.h
+++ b/src/Common/Scheduler/ResourceLink.h
@ -13,13 +13,28 @@ using ResourceCost = Int64;
 struct ResourceLink
 {
    ISchedulerQueue * queue = nullptr;
+
    bool operator==(const ResourceLink &) const = default;
+    explicit operator bool() const { return queue != nullptr; }

-    void adjust(ResourceCost estimated_cost, ResourceCost real_cost) const;
+    void reset()
+    {
+        queue = nullptr;
+    }
+};

-    void consumed(ResourceCost cost) const;
+/*
+ * Everything required for IO scheduling.
+ * Note that raw pointer are stored inside, so make sure that `ClassifierPtr` that produced
+ * resource links will outlive them. Usually classifier is stored in query `Context`.
+ */
+struct IOSchedulingSettings
+{
+    ResourceLink read_resource_link;
+    ResourceLink write_resource_link;

-    void accumulate(ResourceCost cost) const;
+    bool operator==(const IOSchedulingSettings &) const = default;
+    explicit operator bool() const { return read_resource_link && write_resource_link; }
 };

 }
--- a/src/Common/Scheduler/ResourceRequest.h
+++ b/src/Common/Scheduler/ResourceRequest.h
@ -45,7 +45,7 @@ constexpr ResourceCost ResourceCostMax = std::numeric_limits<int>::max();
 class ResourceRequest : public boost::intrusive::list_base_hook<>
 {
 public:
-    /// Cost of request execution; should be filled before request enqueueing.
+    /// Cost of request execution; should be filled before request enqueueing and remain constant until `finish()`.
    /// NOTE: If cost is not known in advance, ResourceBudget should be used (note that every ISchedulerQueue has it)
    ResourceCost cost;

--- a/src/Common/ThreadPool.cpp
+++ b/src/Common/ThreadPool.cpp
@ -1,4 +1,5 @@
 #include <Common/ThreadPool.h>
+#include <Common/ProfileEvents.h>
 #include <Common/setThreadName.h>
 #include <Common/Exception.h>
 #include <Common/getNumberOfPhysicalCPUCores.h>
@ -27,6 +28,25 @@ namespace CurrentMetrics
    extern const Metric GlobalThreadScheduled;
 }

+namespace ProfileEvents
+{
+    extern const Event GlobalThreadPoolExpansions;
+    extern const Event GlobalThreadPoolShrinks;
+    extern const Event GlobalThreadPoolThreadCreationMicroseconds;
+    extern const Event GlobalThreadPoolLockWaitMicroseconds;
+    extern const Event GlobalThreadPoolJobs;
+    extern const Event GlobalThreadPoolJobWaitTimeMicroseconds;
+
+    extern const Event LocalThreadPoolExpansions;
+    extern const Event LocalThreadPoolShrinks;
+    extern const Event LocalThreadPoolThreadCreationMicroseconds;
+    extern const Event LocalThreadPoolLockWaitMicroseconds;
+    extern const Event LocalThreadPoolJobs;
+    extern const Event LocalThreadPoolBusyMicroseconds;
+    extern const Event LocalThreadPoolJobWaitTimeMicroseconds;
+
+}
+
 class JobWithPriority
 {
 public:
@ -40,6 +60,7 @@ public:
    /// Call stacks of all jobs' schedulings leading to this one
    std::vector<StackTrace::FramePointers> frame_pointers;
    bool enable_job_stack_trace = false;
+    Stopwatch job_create_time;

    JobWithPriority(
        Job job_, Priority priority_, CurrentMetrics::Metric metric,
@ -59,6 +80,13 @@ public:
    {
        return priority > rhs.priority; // Reversed for `priority_queue` max-heap to yield minimum value (i.e. highest priority) first
    }
+
+    UInt64 elapsedMicroseconds() const
+    {
+        return job_create_time.elapsedMicroseconds();
+    }
+
+
 };

 static constexpr auto DEFAULT_THREAD_NAME = "ThreadPool";
@ -180,14 +208,18 @@ ReturnType ThreadPoolImpl<Thread>::scheduleImpl(Job job, Priority priority, std:
    };

    {
+        Stopwatch watch;
        std::unique_lock lock(mutex);
+        ProfileEvents::increment(
+            std::is_same_v<Thread, std::thread> ? ProfileEvents::GlobalThreadPoolLockWaitMicroseconds : ProfileEvents::LocalThreadPoolLockWaitMicroseconds,
+            watch.elapsedMicroseconds());

        if (CannotAllocateThreadFaultInjector::injectFault())
            return on_error("fault injected");

        auto pred = [this] { return !queue_size || scheduled_jobs < queue_size || shutdown; };

-        if (wait_microseconds)  /// Check for optional. Condition is true if the optional is set and the value is zero.
+        if (wait_microseconds)  /// Check for optional. Condition is true if the optional is set. Even if the value is zero.
        {
            if (!job_finished.wait_for(lock, std::chrono::microseconds(*wait_microseconds), pred))
                return on_error(fmt::format("no free thread (timeout={})", *wait_microseconds));
@ -216,7 +248,13 @@ ReturnType ThreadPoolImpl<Thread>::scheduleImpl(Job job, Priority priority, std:

            try
            {
+                Stopwatch watch2;
                threads.front() = Thread([this, it = threads.begin()] { worker(it); });
+                ProfileEvents::increment(
+                    std::is_same_v<Thread, std::thread> ? ProfileEvents::GlobalThreadPoolThreadCreationMicroseconds : ProfileEvents::LocalThreadPoolThreadCreationMicroseconds,
+                    watch2.elapsedMicroseconds());
+                ProfileEvents::increment(
+                    std::is_same_v<Thread, std::thread> ? ProfileEvents::GlobalThreadPoolExpansions : ProfileEvents::LocalThreadPoolExpansions);
            }
            catch (...)
            {
@ -239,6 +277,8 @@ ReturnType ThreadPoolImpl<Thread>::scheduleImpl(Job job, Priority priority, std:
    /// Wake up a free thread to run the new job.
    new_job_or_shutdown.notify_one();

+    ProfileEvents::increment(std::is_same_v<Thread, std::thread> ? ProfileEvents::GlobalThreadPoolJobs : ProfileEvents::LocalThreadPoolJobs);
+
    return static_cast<ReturnType>(true);
 }

@ -262,7 +302,14 @@ void ThreadPoolImpl<Thread>::startNewThreadsNoLock()

        try
        {
+            Stopwatch watch;
            threads.front() = Thread([this, it = threads.begin()] { worker(it); });
+            ProfileEvents::increment(
+                std::is_same_v<Thread, std::thread> ? ProfileEvents::GlobalThreadPoolThreadCreationMicroseconds : ProfileEvents::LocalThreadPoolThreadCreationMicroseconds,
+                watch.elapsedMicroseconds());
+            ProfileEvents::increment(
+                std::is_same_v<Thread, std::thread> ? ProfileEvents::GlobalThreadPoolExpansions : ProfileEvents::LocalThreadPoolExpansions);
+
        }
        catch (...)
        {
@ -293,7 +340,11 @@ void ThreadPoolImpl<Thread>::scheduleOrThrow(Job job, Priority priority, uint64_
 template <typename Thread>
 void ThreadPoolImpl<Thread>::wait()
 {
+    Stopwatch watch;
    std::unique_lock lock(mutex);
+    ProfileEvents::increment(
+        std::is_same_v<Thread, std::thread> ? ProfileEvents::GlobalThreadPoolLockWaitMicroseconds : ProfileEvents::LocalThreadPoolLockWaitMicroseconds,
+        watch.elapsedMicroseconds());
    /// Signal here just in case.
    /// If threads are waiting on condition variables, but there are some jobs in the queue
    /// then it will prevent us from deadlock.
@ -334,7 +385,11 @@ void ThreadPoolImpl<Thread>::finalize()

    /// Wait for all currently running jobs to finish (we don't wait for all scheduled jobs here like the function wait() does).
    for (auto & thread : threads)
+    {
        thread.join();
+        ProfileEvents::increment(
+            std::is_same_v<Thread, std::thread> ? ProfileEvents::GlobalThreadPoolShrinks : ProfileEvents::LocalThreadPoolShrinks);
+    }

    threads.clear();
 }
@ -391,7 +446,11 @@ void ThreadPoolImpl<Thread>::worker(typename std::list<Thread>::iterator thread_
        std::optional<JobWithPriority> job_data;

        {
+            Stopwatch watch;
            std::unique_lock lock(mutex);
+            ProfileEvents::increment(
+                std::is_same_v<Thread, std::thread> ? ProfileEvents::GlobalThreadPoolLockWaitMicroseconds : ProfileEvents::LocalThreadPoolLockWaitMicroseconds,
+                watch.elapsedMicroseconds());

            // Finish with previous job if any
            if (job_is_done)
@ -424,6 +483,8 @@ void ThreadPoolImpl<Thread>::worker(typename std::list<Thread>::iterator thread_
                {
                    thread_it->detach();
                    threads.erase(thread_it);
+                    ProfileEvents::increment(
+                        std::is_same_v<Thread, std::thread> ? ProfileEvents::GlobalThreadPoolShrinks : ProfileEvents::LocalThreadPoolShrinks);
                }
                return;
            }
@ -433,6 +494,10 @@ void ThreadPoolImpl<Thread>::worker(typename std::list<Thread>::iterator thread_
            job_data = std::move(const_cast<JobWithPriority &>(jobs.top()));
            jobs.pop();

+            ProfileEvents::increment(
+                std::is_same_v<Thread, std::thread> ? ProfileEvents::GlobalThreadPoolJobWaitTimeMicroseconds : ProfileEvents::LocalThreadPoolJobWaitTimeMicroseconds,
+                job_data->elapsedMicroseconds());
+
            /// We don't run jobs after `shutdown` is set, but we have to properly dequeue all jobs and finish them.
            if (shutdown)
            {
@ -459,7 +524,22 @@ void ThreadPoolImpl<Thread>::worker(typename std::list<Thread>::iterator thread_

            CurrentMetrics::Increment metric_active_pool_threads(metric_active_threads);

-            job_data->job();
+            if constexpr (!std::is_same_v<Thread, std::thread>)
+            {
+                Stopwatch watch;
+                job_data->job();
+                // This metric is less relevant for the global thread pool, as it would show large values (time while
+                // a thread was used by local pools) and increment only when local pools are destroyed.
+                //
+                // In cases where global pool threads are used directly (without a local thread pool), distinguishing
+                // them is difficult.
+                ProfileEvents::increment(ProfileEvents::LocalThreadPoolBusyMicroseconds, watch.elapsedMicroseconds());
+            }
+            else
+            {
+                job_data->job();
+            }
+

            if (thread_trace_context.root_span.isTraceEnabled())
            {
--- a/src/Common/ThreadPool.h
+++ b/src/Common/ThreadPool.h
@ -131,7 +131,7 @@ private:
    bool threads_remove_themselves = true;
    const bool shutdown_on_exception = true;

-    boost::heap::priority_queue<JobWithPriority> jobs;
+    boost::heap::priority_queue<JobWithPriority,boost::heap::stable<true>> jobs;
    std::list<Thread> threads;
    std::exception_ptr first_exception;
    std::stack<OnDestroyCallback> on_destroy_callbacks;
--- a/src/Common/ThreadStatus.h
+++ b/src/Common/ThreadStatus.h
@ -7,11 +7,11 @@
 #include <Common/MemoryTracker.h>
 #include <Common/ProfileEvents.h>
 #include <Common/Stopwatch.h>
+#include <Common/Scheduler/ResourceLink.h>

 #include <boost/noncopyable.hpp>

 #include <functional>
-#include <map>
 #include <memory>
 #include <mutex>
 #include <unordered_set>
@ -188,6 +188,10 @@ public:
    Progress progress_in;
    Progress progress_out;

+    /// IO scheduling
+    ResourceLink read_resource_link;
+    ResourceLink write_resource_link;
+
 private:
    /// Group of threads, to which this thread attached
    ThreadGroupPtr thread_group;
--- a/src/Common/tests/gtest_config_host_validation.cpp
+++ b/src/Common/tests/gtest_config_host_validation.cpp
@ -0,0 +1,69 @@
+#include <Poco/AutoPtr.h>
+#include <Poco/DOM/DOMParser.h>
+#include <Poco/Util/XMLConfiguration.h>
+
+#include <gtest/gtest.h>
+
+TEST(Common, ConfigHostValidation)
+{
+    std::string xml(R"CONFIG(<clickhouse>
+    <IPv4_1>0.0.0.0</IPv4_1>
+    <IPv4_2>192.168.0.1</IPv4_2>
+    <IPv4_3>127.0.0.1</IPv4_3>
+    <IPv4_4>255.255.255.255</IPv4_4>
+    <IPv6_1>2001:0db8:85a3:0000:0000:8a2e:0370:7334</IPv6_1>
+    <IPv6_2>2001:DB8::8a2e:370:7334</IPv6_2>
+    <IPv6_3>::1</IPv6_3>
+    <IPv6_4>::</IPv6_4>
+    <Domain_1>www.example.com.</Domain_1>
+    <Domain_2>a.co</Domain_2>
+    <Domain_3>localhost</Domain_3>
+    <Domain_4>xn--fiqs8s.xn--fiqz9s</Domain_4>
+    <IPv4_Invalid_1>192.168.1.256</IPv4_Invalid_1>
+    <IPv4_Invalid_2>192.168.1.1.1</IPv4_Invalid_2>
+    <IPv4_Invalid_3>192.168.1.99999999999999999999</IPv4_Invalid_3>
+    <IPv4_Invalid_4>192.168.1.a</IPv4_Invalid_4>
+    <IPv6_Invalid_1>2001:0db8:85a3:::8a2e:0370:7334</IPv6_Invalid_1>
+    <IPv6_Invalid_2>1200::AB00:1234::2552:7777:1313</IPv6_Invalid_2>
+    <IPv6_Invalid_3>1200::AB00:1234:Q000:2552:7777:1313</IPv6_Invalid_3>
+    <IPv6_Invalid_4>1200:AB00:1234:2552:7777:1313:FFFF</IPv6_Invalid_4>
+    <Domain_Invalid_1>example.com..</Domain_Invalid_1>
+    <Domain_Invalid_2>5example.com</Domain_Invalid_2>
+    <Domain_Invalid_3>example.com-</Domain_Invalid_3>
+    <Domain_Invalid_4>exa_mple.com</Domain_Invalid_4>
+</clickhouse>)CONFIG");
+
+    Poco::XML::DOMParser dom_parser;
+    Poco::AutoPtr<Poco::XML::Document> document = dom_parser.parseString(xml);
+    Poco::AutoPtr<Poco::Util::XMLConfiguration> config = new Poco::Util::XMLConfiguration(document);
+
+    EXPECT_NO_THROW(config->getHost("IPv4_1"));
+    EXPECT_NO_THROW(config->getHost("IPv4_2"));
+    EXPECT_NO_THROW(config->getHost("IPv4_3"));
+    EXPECT_NO_THROW(config->getHost("IPv4_4"));
+
+    EXPECT_NO_THROW(config->getHost("IPv6_1"));
+    EXPECT_NO_THROW(config->getHost("IPv6_2"));
+    EXPECT_NO_THROW(config->getHost("IPv6_3"));
+    EXPECT_NO_THROW(config->getHost("IPv6_4"));
+
+    EXPECT_NO_THROW(config->getHost("Domain_1"));
+    EXPECT_NO_THROW(config->getHost("Domain_2"));
+    EXPECT_NO_THROW(config->getHost("Domain_3"));
+    EXPECT_NO_THROW(config->getHost("Domain_4"));
+
+    EXPECT_THROW(config->getHost("IPv4_Invalid_1"), Poco::SyntaxException);
+    EXPECT_THROW(config->getHost("IPv4_Invalid_2"), Poco::SyntaxException);
+    EXPECT_THROW(config->getHost("IPv4_Invalid_3"), Poco::SyntaxException);
+    EXPECT_THROW(config->getHost("IPv4_Invalid_4"), Poco::SyntaxException);
+
+    EXPECT_THROW(config->getHost("IPv6_Invalid_1"), Poco::SyntaxException);
+    EXPECT_THROW(config->getHost("IPv6_Invalid_2"), Poco::SyntaxException);
+    EXPECT_THROW(config->getHost("IPv6_Invalid_3"), Poco::SyntaxException);
+    EXPECT_THROW(config->getHost("IPv6_Invalid_4"), Poco::SyntaxException);
+
+    EXPECT_THROW(config->getHost("Domain_Invalid_1"), Poco::SyntaxException);
+    EXPECT_THROW(config->getHost("Domain_Invalid_2"), Poco::SyntaxException);
+    EXPECT_THROW(config->getHost("Domain_Invalid_3"), Poco::SyntaxException);
+    EXPECT_THROW(config->getHost("Domain_Invalid_4"), Poco::SyntaxException);
+}
--- a/src/Core/ExternalTable.cpp
+++ b/src/Core/ExternalTable.cpp
@ -17,11 +17,12 @@

 #include <Core/ExternalTable.h>
 #include <Core/Settings.h>
-#include <Poco/Net/MessageHeader.h>
 #include <Parsers/ASTNameTypePair.h>
+#include <Parsers/IdentifierQuotingStyle.h>
 #include <Parsers/ParserCreateQuery.h>
 #include <Parsers/parseQuery.h>
 #include <base/scope_guard.h>
+#include <Poco/Net/MessageHeader.h>


 namespace DB
@ -85,7 +86,15 @@ void BaseExternalTable::parseStructureFromStructureField(const std::string & arg
        /// We use `formatWithPossiblyHidingSensitiveData` instead of `getColumnNameWithoutAlias` because `column->type` is an ASTFunction.
        /// `getColumnNameWithoutAlias` will return name of the function with `(arguments)` even if arguments is empty.
        if (column)
-            structure.emplace_back(column->name, column->type->formatWithPossiblyHidingSensitiveData(0, true, true, false));
+            structure.emplace_back(
+                column->name,
+                column->type->formatWithPossiblyHidingSensitiveData(
+                    /*max_length=*/0,
+                    /*one_line=*/true,
+                    /*show_secrets=*/true,
+                    /*print_pretty_type_names=*/false,
+                    /*always_quote_identifiers=*/false,
+                    /*identifier_quoting_style=*/IdentifierQuotingStyle::Backticks));
        else
            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Error while parsing table structure: expected column definition, got {}", child->formatForErrorMessage());
    }
@ -102,7 +111,15 @@ void BaseExternalTable::parseStructureFromTypesField(const std::string & argumen
        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Error while parsing table structure: {}", error);

    for (size_t i = 0; i < type_list_raw->children.size(); ++i)
-        structure.emplace_back("_" + toString(i + 1), type_list_raw->children[i]->formatWithPossiblyHidingSensitiveData(0, true, true, false));
+        structure.emplace_back(
+            "_" + toString(i + 1),
+            type_list_raw->children[i]->formatWithPossiblyHidingSensitiveData(
+                /*max_length=*/0,
+                /*one_line=*/true,
+                /*show_secrets=*/true,
+                /*print_pretty_type_names=*/false,
+                /*always_quote_identifiers=*/false,
+                /*identifier_quoting_style=*/IdentifierQuotingStyle::Backticks));
 }

 void BaseExternalTable::initSampleBlock()
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -1296,6 +1296,9 @@ class IColumn;
    M(Bool, precise_float_parsing, false, "Prefer more precise (but slower) float parsing algorithm", 0) \
    M(DateTimeOverflowBehavior, date_time_overflow_behavior, "ignore", "Overflow mode for Date, Date32, DateTime, DateTime64 types. Possible values: 'ignore', 'throw', 'saturate'.", 0) \
    M(Bool, validate_experimental_and_suspicious_types_inside_nested_types, true, "Validate usage of experimental and suspicious types inside nested types like Array/Map/Tuple", 0) \
+    \
+    M(Bool, output_format_always_quote_identifiers, false, "Always quote identifiers", 0) \
+    M(IdentifierQuotingStyle, output_format_identifier_quoting_style, IdentifierQuotingStyle::Backticks, "Set the quoting style for identifiers", 0) \


 // End of FORMAT_FACTORY_SETTINGS
--- a/src/Core/SettingsChangesHistory.cpp
+++ b/src/Core/SettingsChangesHistory.cpp
@ -75,6 +75,8 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
            {"join_output_by_rowlist_perkey_rows_threshold", 0, 5, "The lower limit of per-key average rows in the right table to determine whether to output by row list in hash join."},
            {"create_if_not_exists", false, false, "New setting."},
            {"allow_materialized_view_with_bad_select", true, true, "Support (but not enable yet) stricter validation in CREATE MATERIALIZED VIEW"},
+            {"output_format_always_quote_identifiers", false, false, "New setting."},
+            {"output_format_identifier_quoting_style", "Backticks", "Backticks", "New setting."}
        }
    },
    {"24.8",
--- a/src/Core/SettingsEnums.cpp
+++ b/src/Core/SettingsEnums.cpp
@ -244,4 +244,10 @@ IMPLEMENT_SETTING_ENUM(
    GroupArrayActionWhenLimitReached,
    ErrorCodes::BAD_ARGUMENTS,
    {{"throw", GroupArrayActionWhenLimitReached::THROW}, {"discard", GroupArrayActionWhenLimitReached::DISCARD}})
+
+IMPLEMENT_SETTING_ENUM(IdentifierQuotingStyle, ErrorCodes::BAD_ARGUMENTS,
+    {{"None", IdentifierQuotingStyle::None},
+     {"Backticks", IdentifierQuotingStyle::Backticks},
+     {"DoubleQuotes", IdentifierQuotingStyle::DoubleQuotes},
+     {"BackticksMySQL", IdentifierQuotingStyle::BackticksMySQL}})
 }
--- a/src/Core/SettingsEnums.h
+++ b/src/Core/SettingsEnums.h
@ -10,6 +10,7 @@
 #include <Formats/FormatSettings.h>
 #include <IO/ReadSettings.h>
 #include <Parsers/ASTSQLSecurity.h>
+#include <Parsers/IdentifierQuotingStyle.h>
 #include <QueryPipeline/SizeLimits.h>
 #include <Common/ShellCommandSettings.h>

@ -351,6 +352,8 @@ DECLARE_SETTING_ENUM_WITH_RENAME(DateTimeOverflowBehavior, FormatSettings::DateT

 DECLARE_SETTING_ENUM(SQLSecurityType)

+DECLARE_SETTING_ENUM(IdentifierQuotingStyle)
+
 enum class GroupArrayActionWhenLimitReached : uint8_t
 {
    THROW,
--- a/src/Core/SettingsFields.cpp
+++ b/src/Core/SettingsFields.cpp
@ -210,7 +210,7 @@ namespace
 {
    UInt64 stringToMaxThreads(const String & str)
    {
-        if (startsWith(str, "auto"))
+        if (startsWith(str, "auto") || startsWith(str, "'auto"))
            return 0;
        return parseFromString<UInt64>(str);
    }
@ -237,7 +237,8 @@ SettingFieldMaxThreads & SettingFieldMaxThreads::operator=(const Field & f)
 String SettingFieldMaxThreads::toString() const
 {
    if (is_auto)
-        return "auto(" + ::DB::toString(value) + ")";
+        /// Removing quotes here will introduce an incompatibility between replicas with different versions.
+        return "'auto(" + ::DB::toString(value) + ")'";
    else
        return ::DB::toString(value);
 }
--- a/src/DataTypes/DataTypeObject.cpp
+++ b/src/DataTypes/DataTypeObject.cpp
@ -519,10 +519,10 @@ static DataTypePtr createJSON(const ASTPtr & arguments)
    if (!context)
        context = Context::getGlobalContextInstance();

-    if (context->getSettingsRef().use_json_alias_for_old_object_type)
+    if (context->getSettingsRef().allow_experimental_object_type && context->getSettingsRef().use_json_alias_for_old_object_type)
    {
        if (arguments && !arguments->children.empty())
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Experimental Object type doesn't support any arguments. If you want to use new JSON type, set setting allow_experimental_json_type = 1");
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Experimental Object type doesn't support any arguments. If you want to use new JSON type, set settings allow_experimental_json_type = 1 and use_json_alias_for_old_object_type = 0");

        return std::make_shared<DataTypeObjectDeprecated>("JSON", false);
    }
--- a/src/Dictionaries/Embedded/RegionsNames.h
+++ b/src/Dictionaries/Embedded/RegionsNames.h
@ -35,9 +35,10 @@ class RegionsNames
    M(et, ru, 11) \
    M(pt, en, 12) \
    M(he, en, 13) \
-    M(vi, en, 14)
+    M(vi, en, 14) \
+    M(es, en, 15)

-    static constexpr size_t total_languages = 15;
+    static constexpr size_t total_languages = 16;

 public:
    enum class Language : size_t
--- a/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp
+++ b/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp
@ -8,6 +8,7 @@
 #include <IO/ReadBufferFromString.h>
 #include <Common/logger_useful.h>
 #include <Common/Throttler.h>
+#include <Common/Scheduler/ResourceGuard.h>
 #include <base/sleep.h>
 #include <Common/ProfileEvents.h>
 #include <IO/SeekableReadBuffer.h>
@ -113,7 +114,9 @@ bool ReadBufferFromAzureBlobStorage::nextImpl()
    {
        try
        {
+            ResourceGuard rlock(ResourceGuard::Metrics::getIORead(), read_settings.io_scheduling.read_resource_link, to_read_bytes);
            bytes_read = data_stream->ReadToCount(reinterpret_cast<uint8_t *>(data_ptr), to_read_bytes);
+            rlock.unlock(bytes_read); // Do not hold resource under bandwidth throttler
            if (read_settings.remote_throttler)
                read_settings.remote_throttler->add(bytes_read, ProfileEvents::RemoteReadThrottlerBytes, ProfileEvents::RemoteReadThrottlerSleepMicroseconds);
            break;
--- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp
+++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp
@ -101,15 +101,13 @@ void WriteBufferFromAzureBlobStorage::execWithRetry(std::function<void()> func,
    {
        try
        {
-            ResourceGuard rlock(write_settings.resource_link, cost); // Note that zero-cost requests are ignored
+            ResourceGuard rlock(ResourceGuard::Metrics::getIOWrite(), write_settings.io_scheduling.write_resource_link, cost); // Note that zero-cost requests are ignored
            func();
+            rlock.unlock(cost);
            break;
        }
        catch (const Azure::Core::RequestFailedException & e)
        {
-            if (cost)
-                write_settings.resource_link.accumulate(cost); // Accumulate resource for later use, because we have failed to consume it
-
            if (i == num_tries - 1 || !isRetryableAzureException(e))
                throw;

@ -117,8 +115,6 @@ void WriteBufferFromAzureBlobStorage::execWithRetry(std::function<void()> func,
        }
        catch (...)
        {
-            if (cost)
-                write_settings.resource_link.accumulate(cost); // We assume no resource was used in case of failure
            throw;
        }
    }
--- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp
@ -461,14 +461,17 @@ DiskObjectStoragePtr DiskObjectStorage::createDiskObjectStorage()
 }

 template <class Settings>
-static inline Settings updateResourceLink(const Settings & settings, const String & resource_name)
+static inline Settings updateIOSchedulingSettings(const Settings & settings, const String & read_resource_name, const String & write_resource_name)
 {
-    if (resource_name.empty())
+    if (read_resource_name.empty() && write_resource_name.empty())
        return settings;
    if (auto query_context = CurrentThread::getQueryContext())
    {
        Settings result(settings);
-        result.resource_link = query_context->getWorkloadClassifier()->get(resource_name);
+        if (!read_resource_name.empty())
+            result.io_scheduling.read_resource_link = query_context->getWorkloadClassifier()->get(read_resource_name);
+        if (!write_resource_name.empty())
+            result.io_scheduling.write_resource_link = query_context->getWorkloadClassifier()->get(write_resource_name);
        return result;
    }
    return settings;
@ -500,7 +503,7 @@ std::unique_ptr<ReadBufferFromFileBase> DiskObjectStorage::readFile(

    return object_storage->readObjects(
        storage_objects,
-        updateResourceLink(settings, getReadResourceName()),
+        updateIOSchedulingSettings(settings, getReadResourceName(), getWriteResourceName()),
        read_hint,
        file_size);
 }
@ -513,7 +516,7 @@ std::unique_ptr<WriteBufferFromFileBase> DiskObjectStorage::writeFile(
 {
    LOG_TEST(log, "Write file: {}", path);

-    WriteSettings write_settings = updateResourceLink(settings, getWriteResourceName());
+    WriteSettings write_settings = updateIOSchedulingSettings(settings, getReadResourceName(), getWriteResourceName());
    auto transaction = createObjectStorageTransaction();
    return transaction->writeFile(path, buf_size, mode, write_settings);
 }
--- a/src/Formats/EscapingRuleUtils.cpp
+++ b/src/Formats/EscapingRuleUtils.cpp
@ -302,8 +302,12 @@ DataTypePtr tryInferDataTypeByEscapingRule(const String & field, const FormatSet
                /// Try to determine the type of value inside quotes
                auto type = tryInferDataTypeForSingleField(data, format_settings);

-                /// If we couldn't infer any type or it's a number and csv.try_infer_numbers_from_strings = 0, we determine it as a string.
-                if (!type || (format_settings.csv.try_infer_strings_from_quoted_tuples && isTuple(type)) || (!format_settings.csv.try_infer_numbers_from_strings && isNumber(type)))
+                /// Return String type if one of the following conditions apply
+                ///  - we couldn't infer any type
+                ///  - it's a number and csv.try_infer_numbers_from_strings = 0
+                ///  - it's a tuple and try_infer_strings_from_quoted_tuples = 0
+                ///  - it's a Bool type (we don't allow reading bool values from strings)
+                if (!type || (format_settings.csv.try_infer_strings_from_quoted_tuples && isTuple(type)) || (!format_settings.csv.try_infer_numbers_from_strings && isNumber(type)) || isBool(type))
                    return std::make_shared<DataTypeString>();

                return type;
--- a/src/Functions/FunctionsConversion.cpp
+++ b/src/Functions/FunctionsConversion.cpp
@ -4134,6 +4134,29 @@ private:
        };
    }

+    /// Create wrapper only if we support this conversion.
+    WrapperType createWrapperIfCanConvert(const DataTypePtr & from, const DataTypePtr & to) const
+    {
+        try
+        {
+            /// We can avoid try/catch here if we will implement check that 2 types can be casted, but it
+            /// requires quite a lot of work. By now let's simply use try/catch.
+            /// First, check that we can create a wrapper.
+            WrapperType wrapper = prepareUnpackDictionaries(from, to);
+            /// Second, check if we can perform a conversion on column with default value.
+            /// (we cannot just check empty column as we do some checks only during iteration over rows).
+            auto test_col = from->createColumn();
+            test_col->insertDefault();
+            ColumnsWithTypeAndName column_from = {{test_col->getPtr(), from, "" }};
+            wrapper(column_from, to, nullptr, 1);
+            return wrapper;
+        }
+        catch (const Exception &)
+        {
+            return {};
+        }
+    }
+
    WrapperType createVariantToColumnWrapper(const DataTypeVariant & from_variant, const DataTypePtr & to_type) const
    {
        const auto & variant_types = from_variant.getVariants();
@ -4142,7 +4165,19 @@ private:

        /// Create conversion wrapper for each variant.
        for (const auto & variant_type : variant_types)
-            variant_wrappers.push_back(prepareUnpackDictionaries(variant_type, to_type));
+        {
+            WrapperType wrapper;
+            if (cast_type == CastType::accurateOrNull)
+            {
+                /// Create wrapper only if we support conversion from variant to the resulting type.
+                wrapper = createWrapperIfCanConvert(variant_type, to_type);
+            }
+            else
+            {
+                wrapper = prepareUnpackDictionaries(variant_type, to_type);
+            }
+            variant_wrappers.push_back(wrapper);
+        }

        return [variant_wrappers, variant_types, to_type]
               (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr
@ -4157,7 +4192,11 @@ private:
                auto variant_col = column_variant.getVariantPtrByGlobalDiscriminator(i);
                ColumnsWithTypeAndName variant = {{variant_col, variant_types[i], "" }};
                const auto & variant_wrapper = variant_wrappers[i];
-                casted_variant_columns.push_back(variant_wrapper(variant, result_type, nullptr, variant_col->size()));
+                ColumnPtr casted_variant;
+                /// Check if we have wrapper for this variant.
+                if (variant_wrapper)
+                    casted_variant = variant_wrapper(variant, result_type, nullptr, variant_col->size());
+                casted_variant_columns.push_back(std::move(casted_variant));
            }

            /// Second, construct resulting column from casted variant columns according to discriminators.
@ -4167,7 +4206,7 @@ private:
            for (size_t i = 0; i != input_rows_count; ++i)
            {
                auto global_discr = column_variant.globalDiscriminatorByLocal(local_discriminators[i]);
-                if (global_discr == ColumnVariant::NULL_DISCRIMINATOR)
+                if (global_discr == ColumnVariant::NULL_DISCRIMINATOR || !casted_variant_columns[global_discr])
                    res->insertDefault();
                else
                    res->insertFrom(*casted_variant_columns[global_discr], column_variant.offsetAt(i));
@ -4357,10 +4396,27 @@ private:
            casted_variant_columns.reserve(variant_types.size());
            for (size_t i = 0; i != variant_types.size(); ++i)
            {
+                /// Skip shared variant, it will be processed later.
+                if (i == column_dynamic.getSharedVariantDiscriminator())
+                {
+                    casted_variant_columns.push_back(nullptr);
+                    continue;
+                }
+
                const auto & variant_col = variant_column.getVariantPtrByGlobalDiscriminator(i);
                ColumnsWithTypeAndName variant = {{variant_col, variant_types[i], ""}};
-                auto variant_wrapper = prepareUnpackDictionaries(variant_types[i], result_type);
-                casted_variant_columns.push_back(variant_wrapper(variant, result_type, nullptr, variant_col->size()));
+                WrapperType variant_wrapper;
+                if (cast_type == CastType::accurateOrNull)
+                    /// Create wrapper only if we support conversion from variant to the resulting type.
+                    variant_wrapper = createWrapperIfCanConvert(variant_types[i], result_type);
+                else
+                    variant_wrapper = prepareUnpackDictionaries(variant_types[i], result_type);
+
+                ColumnPtr casted_variant;
+                /// Check if we have wrapper for this variant.
+                if (variant_wrapper)
+                    casted_variant = variant_wrapper(variant, result_type, nullptr, variant_col->size());
+                casted_variant_columns.push_back(casted_variant);
            }

            /// Second, collect all variants stored in shared variant and cast them to result type.
@ -4416,8 +4472,18 @@ private:
            for (size_t i = 0; i != variant_types_from_shared_variant.size(); ++i)
            {
                ColumnsWithTypeAndName variant = {{variant_columns_from_shared_variant[i]->getPtr(), variant_types_from_shared_variant[i], ""}};
-                auto variant_wrapper = prepareUnpackDictionaries(variant_types_from_shared_variant[i], result_type);
-                casted_shared_variant_columns.push_back(variant_wrapper(variant, result_type, nullptr, variant_columns_from_shared_variant[i]->size()));
+                WrapperType variant_wrapper;
+                if (cast_type == CastType::accurateOrNull)
+                    /// Create wrapper only if we support conversion from variant to the resulting type.
+                    variant_wrapper = createWrapperIfCanConvert(variant_types_from_shared_variant[i], result_type);
+                else
+                    variant_wrapper = prepareUnpackDictionaries(variant_types_from_shared_variant[i], result_type);
+
+                ColumnPtr casted_variant;
+                /// Check if we have wrapper for this variant.
+                if (variant_wrapper)
+                    casted_variant = variant_wrapper(variant, result_type, nullptr, variant_columns_from_shared_variant[i]->size());
+                casted_shared_variant_columns.push_back(casted_variant);
            }

            /// Construct result column from all casted variants.
@ -4427,11 +4493,23 @@ private:
            {
                auto global_discr = variant_column.globalDiscriminatorByLocal(local_discriminators[i]);
                if (global_discr == ColumnVariant::NULL_DISCRIMINATOR)
+                {
                    res->insertDefault();
+                }
                else if (global_discr == shared_variant_discr)
-                    res->insertFrom(*casted_shared_variant_columns[shared_variant_indexes[i]], shared_variant_offsets[i]);
+                {
+                    if (casted_shared_variant_columns[shared_variant_indexes[i]])
+                        res->insertFrom(*casted_shared_variant_columns[shared_variant_indexes[i]], shared_variant_offsets[i]);
+                    else
+                        res->insertDefault();
+                }
                else
-                    res->insertFrom(*casted_variant_columns[global_discr], offsets[i]);
+                {
+                    if (casted_variant_columns[global_discr])
+                        res->insertFrom(*casted_variant_columns[global_discr], offsets[i]);
+                    else
+                        res->insertDefault();
+                }
            }

            return res;
--- a/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.cpp
+++ b/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.cpp
@ -406,7 +406,7 @@ void UserDefinedSQLObjectsZooKeeperStorage::syncObjects(const zkutil::ZooKeeperP
    LOG_DEBUG(log, "Syncing user-defined {} objects", object_type);
    Strings object_names = getObjectNamesAndSetWatch(zookeeper, object_type);

-    getLock();
+    auto lock = getLock();

    /// Remove stale objects
    removeAllObjectsExcept(object_names);
--- a/src/Functions/array/arrayZip.cpp
+++ b/src/Functions/array/arrayZip.cpp
@ -1,7 +1,8 @@
-#include <Columns/ColumnTuple.h>
 #include <Columns/ColumnArray.h>
-#include <DataTypes/DataTypeTuple.h>
+#include <Columns/ColumnNullable.h>
+#include <Columns/ColumnTuple.h>
 #include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeTuple.h>
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionHelpers.h>
 #include <IO/WriteHelpers.h>
@ -12,23 +13,22 @@ namespace DB

 namespace ErrorCodes
 {
-    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
-    extern const int SIZES_OF_ARRAYS_DONT_MATCH;
-    extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION;
-    extern const int ILLEGAL_COLUMN;
+extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+extern const int SIZES_OF_ARRAYS_DONT_MATCH;
+extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION;
+extern const int ILLEGAL_COLUMN;
 }

 /// arrayZip(['a', 'b', 'c'], ['d', 'e', 'f']) = [('a', 'd'), ('b', 'e'), ('c', 'f')]
+/// arrayZipUnaligned(['a', 'b', 'c'], ['d', 'e']) = [('a', 'd'), ('b', 'e'), ('c', null)]
+template <bool allow_unaligned>
 class FunctionArrayZip : public IFunction
 {
 public:
-    static constexpr auto name = "arrayZip";
+    static constexpr auto name = allow_unaligned ? "arrayZipUnaligned" : "arrayZip";
    static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionArrayZip>(); }

-    String getName() const override
-    {
-        return name;
-    }
+    String getName() const override { return name; }

    bool isVariadic() const override { return true; }
    size_t getNumberOfArguments() const override { return 0; }
@ -39,8 +39,11 @@ public:
    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
    {
        if (arguments.empty())
-            throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION,
-                "Function {} needs at least one argument; passed {}." , getName(), arguments.size());
+            throw Exception(
+                ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION,
+                "Function {} needs at least one argument; passed {}.",
+                getName(),
+                arguments.size());

        DataTypes arguments_types;
        for (size_t index = 0; index < arguments.size(); ++index)
@ -48,56 +51,142 @@ public:
            const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[index].type.get());

            if (!array_type)
-                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument {} of function {} must be array. Found {} instead.",
-                    toString(index + 1), getName(), arguments[0].type->getName());
+                throw Exception(
+                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                    "Argument {} of function {} must be array. Found {} instead.",
+                    toString(index + 1),
+                    getName(),
+                    arguments[0].type->getName());

-            arguments_types.emplace_back(array_type->getNestedType());
+            auto nested_type = array_type->getNestedType();
+            if constexpr (allow_unaligned)
+                nested_type = makeNullable(nested_type);
+            arguments_types.emplace_back(nested_type);
        }

        return std::make_shared<DataTypeArray>(std::make_shared<DataTypeTuple>(arguments_types));
    }

-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
+    ColumnPtr
+    executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t input_rows_count) const override
    {
        size_t num_arguments = arguments.size();
-
-        ColumnPtr first_array_column;
+        Columns holders(num_arguments);
        Columns tuple_columns(num_arguments);

+        bool has_unaligned = false;
+        size_t unaligned_index = 0;
        for (size_t i = 0; i < num_arguments; ++i)
        {
            /// Constant columns cannot be inside tuple. It's only possible to have constant tuple as a whole.
            ColumnPtr holder = arguments[i].column->convertToFullColumnIfConst();
+            holders[i] = holder;

            const ColumnArray * column_array = checkAndGetColumn<ColumnArray>(holder.get());
-
            if (!column_array)
-                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Argument {} of function {} must be array. Found column {} instead.",
-                    i + 1, getName(), holder->getName());
-
-            if (i == 0)
-            {
-                first_array_column = holder;
-            }
-            else if (!column_array->hasEqualOffsets(static_cast<const ColumnArray &>(*first_array_column)))
-            {
-                throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
-                                "The argument 1 and argument {} of function {} have different array sizes",
-                                i + 1, getName());
-            }
-
+                throw Exception(
+                    ErrorCodes::ILLEGAL_COLUMN,
+                    "Argument {} of function {} must be array. Found column {} instead.",
+                    i + 1,
+                    getName(),
+                    holder->getName());
            tuple_columns[i] = column_array->getDataPtr();
+
+            if (i && !column_array->hasEqualOffsets(static_cast<const ColumnArray &>(*holders[0])))
+            {
+                has_unaligned = true;
+                unaligned_index = i;
+            }
        }

-        return ColumnArray::create(
-            ColumnTuple::create(tuple_columns), static_cast<const ColumnArray &>(*first_array_column).getOffsetsPtr());
+        if constexpr (!allow_unaligned)
+        {
+            if (has_unaligned)
+                throw Exception(
+                    ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
+                    "The argument 1 and argument {} of function {} have different array sizes",
+                    unaligned_index + 1,
+                    getName());
+            else
+                return ColumnArray::create(
+                    ColumnTuple::create(std::move(tuple_columns)), static_cast<const ColumnArray &>(*holders[0]).getOffsetsPtr());
+        }
+        else
+            return executeUnaligned(holders, tuple_columns, input_rows_count, has_unaligned);
+    }
+
+private:
+    ColumnPtr executeUnaligned(const Columns & holders, Columns & tuple_columns, size_t input_rows_count, bool has_unaligned) const
+    {
+        std::vector<const ColumnArray *> array_columns(holders.size());
+        for (size_t i = 0; i < holders.size(); ++i)
+            array_columns[i] = checkAndGetColumn<ColumnArray>(holders[i].get());
+
+        for (auto & tuple_column : tuple_columns)
+            tuple_column = makeNullable(tuple_column);
+
+        if (!has_unaligned)
+            return ColumnArray::create(ColumnTuple::create(std::move(tuple_columns)), array_columns[0]->getOffsetsPtr());
+
+        MutableColumns res_tuple_columns(tuple_columns.size());
+        for (size_t i = 0; i < tuple_columns.size(); ++i)
+        {
+            res_tuple_columns[i] = tuple_columns[i]->cloneEmpty();
+            res_tuple_columns[i]->reserve(tuple_columns[i]->size());
+        }
+
+        auto res_offsets_column = ColumnArray::ColumnOffsets::create(input_rows_count);
+        auto & res_offsets = assert_cast<ColumnArray::ColumnOffsets &>(*res_offsets_column).getData();
+        size_t curr_offset = 0;
+        for (size_t row_i = 0; row_i < input_rows_count; ++row_i)
+        {
+            size_t max_size = 0;
+            for (size_t arg_i = 0; arg_i < holders.size(); ++arg_i)
+            {
+                const auto * array_column = array_columns[arg_i];
+                const auto & offsets = array_column->getOffsets();
+                size_t array_offset = offsets[row_i - 1];
+                size_t array_size = offsets[row_i] - array_offset;
+
+                res_tuple_columns[arg_i]->insertRangeFrom(*tuple_columns[arg_i], array_offset, array_size);
+                max_size = std::max(max_size, array_size);
+            }
+
+            for (size_t arg_i = 0; arg_i < holders.size(); ++arg_i)
+            {
+                const auto * array_column = array_columns[arg_i];
+                const auto & offsets = array_column->getOffsets();
+                size_t array_offset = offsets[row_i - 1];
+                size_t array_size = offsets[row_i] - array_offset;
+
+                res_tuple_columns[arg_i]->insertManyDefaults(max_size - array_size);
+            }
+
+            curr_offset += max_size;
+            res_offsets[row_i] = curr_offset;
+        }
+
+        return ColumnArray::create(ColumnTuple::create(std::move(res_tuple_columns)), std::move(res_offsets_column));
    }
 };

 REGISTER_FUNCTION(ArrayZip)
 {
-    factory.registerFunction<FunctionArrayZip>();
+    factory.registerFunction<FunctionArrayZip<false>>(
+        {.description = R"(
+Combines multiple arrays into a single array. The resulting array contains the corresponding elements of the source arrays grouped into tuples in the listed order of arguments.
+)",
+         .categories{"String"}});
+
+    factory.registerFunction<FunctionArrayZip<true>>(
+        {.description = R"(
+Combines multiple arrays into a single array, allowing for unaligned arrays. The resulting array contains the corresponding elements of the source arrays grouped into tuples in the listed order of arguments.
+
+If the arrays have different sizes, the shorter arrays will be padded with `null` values.
+)",
+         .categories{"String"}}
+
+    );
 }

 }
-
--- a/src/Functions/date_trunc.cpp
+++ b/src/Functions/date_trunc.cpp
@ -2,6 +2,7 @@
 #include <Columns/ColumnString.h>
 #include <Columns/ColumnsNumber.h>
 #include <DataTypes/DataTypeDate.h>
+#include <DataTypes/DataTypeDate32.h>
 #include <DataTypes/DataTypeDateTime.h>
 #include <DataTypes/DataTypeInterval.h>
 #include <Formats/FormatSettings.h>
@ -43,6 +44,7 @@ public:
        enum ResultType
        {
            Date,
+            Date32,
            DateTime,
            DateTime64,
        };
@ -75,15 +77,15 @@ public:

        bool second_argument_is_date = false;
        auto check_second_argument = [&] {
-            if (!isDate(arguments[1].type) && !isDateTime(arguments[1].type) && !isDateTime64(arguments[1].type))
+            if (!isDateOrDate32(arguments[1].type) && !isDateTime(arguments[1].type) && !isDateTime64(arguments[1].type))
                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of 2nd argument of function {}. "
                    "Should be a date or a date with time", arguments[1].type->getName(), getName());

-            second_argument_is_date = isDate(arguments[1].type);
+            second_argument_is_date = isDateOrDate32(arguments[1].type);

            if (second_argument_is_date && ((datepart_kind == IntervalKind::Kind::Hour)
                || (datepart_kind == IntervalKind::Kind::Minute) || (datepart_kind == IntervalKind::Kind::Second)))
-                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type Date of argument for function {}", getName());
+                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument for function {}", arguments[1].type->getName(), getName());
        };

        auto check_timezone_argument = [&] {
@ -119,6 +121,8 @@ public:

        if (result_type == ResultType::Date)
            return std::make_shared<DataTypeDate>();
+        if (result_type == ResultType::Date32)
+            return std::make_shared<DataTypeDate32>();
        else if (result_type == ResultType::DateTime)
            return std::make_shared<DataTypeDateTime>(extractTimeZoneNameFromFunctionArguments(arguments, 2, 1, false));
        else
--- a/src/Functions/toStartOfInterval.cpp
+++ b/src/Functions/toStartOfInterval.cpp
@ -44,9 +44,9 @@ public:
        auto check_first_argument = [&]
        {
            const DataTypePtr & type_arg1 = arguments[0].type;
-            if (!isDate(type_arg1) && !isDateTime(type_arg1) && !isDateTime64(type_arg1))
+            if (!isDateOrDate32(type_arg1) && !isDateTime(type_arg1) && !isDateTime64(type_arg1))
                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                    "Illegal type {} of 1st argument of function {}, expected a Date, DateTime or DateTime64",
+                    "Illegal type {} of 1st argument of function {}, expected a Date, Date32, DateTime or DateTime64",
                    type_arg1->getName(), getName());
            value_is_date = isDate(type_arg1);
        };
@ -56,6 +56,7 @@ public:
        enum class ResultType : uint8_t
        {
            Date,
+            Date32,
            DateTime,
            DateTime64
        };
@ -128,6 +129,8 @@ public:
        {
            case ResultType::Date:
                return std::make_shared<DataTypeDate>();
+            case ResultType::Date32:
+                return std::make_shared<DataTypeDate32>();
            case ResultType::DateTime:
                return std::make_shared<DataTypeDateTime>(extractTimeZoneNameFromFunctionArguments(arguments, 2, 0, false));
            case ResultType::DateTime64:
@ -185,7 +188,13 @@ private:
            if (time_column_vec)
                return dispatchForIntervalColumn(assert_cast<const DataTypeDate &>(time_column_type), *time_column_vec, interval_column, result_type, time_zone, input_rows_count);
        }
-        throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal column for 1st argument of function {}, expected a Date, DateTime or DateTime64", getName());
+        else if (isDate32(time_column_type))
+        {
+            const auto * time_column_vec = checkAndGetColumn<ColumnDate32>(&time_column_col);
+            if (time_column_vec)
+                return dispatchForIntervalColumn(assert_cast<const DataTypeDate32 &>(time_column_type), *time_column_vec, interval_column, result_type, time_zone, input_rows_count);
+        }
+        throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal column for 1st argument of function {}, expected a Date, Date32, DateTime or DateTime64", getName());
    }

    template <typename TimeDataType, typename TimeColumnType>
--- a/src/IO/Progress.cpp
+++ b/src/IO/Progress.cpp
@ -91,6 +91,8 @@ void ProgressValues::writeJSON(WriteBuffer & out) const
    writeText(result_bytes, out);
    writeCString("\",\"elapsed_ns\":\"", out);
    writeText(elapsed_ns, out);
+    writeCString("\",\"real_time_microseconds\":\"", out);
+    writeText(real_time_microseconds, out);
    writeCString("\"", out);
    writeCString("}", out);
 }
@ -110,6 +112,7 @@ bool Progress::incrementPiecewiseAtomically(const Progress & rhs)
    result_bytes += rhs.result_bytes;

    elapsed_ns += rhs.elapsed_ns;
+    real_time_microseconds += rhs.real_time_microseconds;

    return rhs.read_rows || rhs.written_rows;
 }
@ -129,6 +132,7 @@ void Progress::reset()
    result_bytes = 0;

    elapsed_ns = 0;
+    real_time_microseconds = 0;
 }

 ProgressValues Progress::getValues() const
@ -148,6 +152,7 @@ ProgressValues Progress::getValues() const
    res.result_bytes = result_bytes.load(std::memory_order_relaxed);

    res.elapsed_ns = elapsed_ns.load(std::memory_order_relaxed);
+    res.real_time_microseconds = real_time_microseconds.load(std::memory_order_relaxed);

    return res;
 }
@ -169,6 +174,7 @@ ProgressValues Progress::fetchValuesAndResetPiecewiseAtomically()
    res.result_bytes = result_bytes.fetch_and(0);

    res.elapsed_ns = elapsed_ns.fetch_and(0);
+    res.real_time_microseconds = real_time_microseconds.fetch_and(0);

    return res;
 }
@ -190,6 +196,7 @@ Progress Progress::fetchAndResetPiecewiseAtomically()
    res.result_bytes = result_bytes.fetch_and(0);

    res.elapsed_ns = elapsed_ns.fetch_and(0);
+    res.real_time_microseconds = real_time_microseconds.fetch_and(0);

    return res;
 }
@ -209,6 +216,7 @@ Progress & Progress::operator=(Progress && other) noexcept
    result_bytes = other.result_bytes.load(std::memory_order_relaxed);

    elapsed_ns = other.elapsed_ns.load(std::memory_order_relaxed);
+    real_time_microseconds = other.real_time_microseconds.load(std::memory_order_relaxed);

    return *this;
 }
@ -244,4 +252,9 @@ void Progress::incrementElapsedNs(UInt64 elapsed_ns_)
    elapsed_ns.fetch_add(elapsed_ns_, std::memory_order_relaxed);
 }

+void Progress::incrementRealTimeMicroseconds(UInt64 microseconds)
+{
+    real_time_microseconds.fetch_add(microseconds, std::memory_order_relaxed);
+}
+
 }
--- a/src/IO/Progress.h
+++ b/src/IO/Progress.h
@ -28,6 +28,7 @@ struct ProgressValues
    UInt64 result_bytes = 0;

    UInt64 elapsed_ns = 0;
+    UInt64 real_time_microseconds = 0;

    void read(ReadBuffer & in, UInt64 server_revision);
    void write(WriteBuffer & out, UInt64 client_revision) const;
@ -40,6 +41,7 @@ struct ReadProgress
    UInt64 read_bytes = 0;
    UInt64 total_rows_to_read = 0;
    UInt64 total_bytes_to_read = 0;
+    UInt64 real_time_microseconds = 0;

    ReadProgress(UInt64 read_rows_, UInt64 read_bytes_, UInt64 total_rows_to_read_ = 0, UInt64 total_bytes_to_read_ = 0)
        : read_rows(read_rows_), read_bytes(read_bytes_), total_rows_to_read(total_rows_to_read_), total_bytes_to_read(total_bytes_to_read_) {}
@ -96,6 +98,8 @@ struct Progress

    std::atomic<UInt64> elapsed_ns {0};

+    std::atomic<UInt64> real_time_microseconds {0};
+
    Progress() = default;

    Progress(UInt64 read_rows_, UInt64 read_bytes_, UInt64 total_rows_to_read_ = 0, UInt64 total_bytes_to_read_ = 0)
@ -125,6 +129,8 @@ struct Progress

    void incrementElapsedNs(UInt64 elapsed_ns_);

+    void incrementRealTimeMicroseconds(UInt64 microseconds);
+
    void reset();

    ProgressValues getValues() const;
--- a/src/IO/ReadBufferFromS3.cpp
+++ b/src/IO/ReadBufferFromS3.cpp
@ -6,7 +6,6 @@

 #include <IO/ReadBufferFromIStream.h>
 #include <IO/ReadBufferFromS3.h>
-#include <Common/Scheduler/ResourceGuard.h>
 #include <IO/S3/getObjectInfo.h>
 #include <IO/S3/Requests.h>

@ -423,22 +422,13 @@ Aws::S3::Model::GetObjectResult ReadBufferFromS3::sendRequest(size_t attempt, si
    ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::ReadBufferFromS3InitMicroseconds);

    // We do not know in advance how many bytes we are going to consume, to avoid blocking estimated it from below
-    constexpr ResourceCost estimated_cost = 1;
-    ResourceGuard rlock(read_settings.resource_link, estimated_cost);
-
+    CurrentThread::IOScope io_scope(read_settings.io_scheduling);
    Aws::S3::Model::GetObjectOutcome outcome = client_ptr->GetObject(req);

-    rlock.unlock();
-
    if (outcome.IsSuccess())
-    {
-        ResourceCost bytes_read = outcome.GetResult().GetContentLength();
-        read_settings.resource_link.adjust(estimated_cost, bytes_read);
        return outcome.GetResultWithOwnership();
-    }
    else
    {
-        read_settings.resource_link.accumulate(estimated_cost);
        const auto & error = outcome.GetError();
        throw S3Exception(error.GetMessage(), error.GetErrorType());
    }
--- a/src/IO/ReadSettings.h
+++ b/src/IO/ReadSettings.h
@ -118,8 +118,7 @@ struct ReadSettings
    ThrottlerPtr remote_throttler;
    ThrottlerPtr local_throttler;

-    // Resource to be used during reading
-    ResourceLink resource_link;
+    IOSchedulingSettings io_scheduling;

    size_t http_max_tries = 10;
    size_t http_retry_initial_backoff_ms = 100;
--- a/src/IO/WriteBufferFromS3.cpp
+++ b/src/IO/WriteBufferFromS3.cpp
@ -11,7 +11,6 @@
 #include <Common/Throttler.h>
 #include <Interpreters/Cache/FileCache.h>

-#include <Common/Scheduler/ResourceGuard.h>
 #include <IO/WriteHelpers.h>
 #include <IO/S3Common.h>
 #include <IO/S3/Requests.h>
@ -558,12 +557,11 @@ void WriteBufferFromS3::writePart(WriteBufferFromS3::PartData && data)

        auto & request = std::get<0>(*worker_data);

-        ResourceCost cost = request.GetContentLength();
-        ResourceGuard rlock(write_settings.resource_link, cost);
+        CurrentThread::IOScope io_scope(write_settings.io_scheduling);
+
        Stopwatch watch;
        auto outcome = client_ptr->UploadPart(request);
        watch.stop();
-        rlock.unlock(); // Avoid acquiring other locks under resource lock

        ProfileEvents::increment(ProfileEvents::WriteBufferFromS3Microseconds, watch.elapsedMicroseconds());

@ -577,7 +575,6 @@ void WriteBufferFromS3::writePart(WriteBufferFromS3::PartData && data)
        if (!outcome.IsSuccess())
        {
            ProfileEvents::increment(ProfileEvents::WriteBufferFromS3RequestsErrors, 1);
-            write_settings.resource_link.accumulate(cost); // We assume no resource was used in case of failure
            throw S3Exception(outcome.GetError().GetMessage(), outcome.GetError().GetErrorType());
        }

@ -715,12 +712,11 @@ void WriteBufferFromS3::makeSinglepartUpload(WriteBufferFromS3::PartData && data
            if (client_ptr->isClientForDisk())
                ProfileEvents::increment(ProfileEvents::DiskS3PutObject);

-            ResourceCost cost = request.GetContentLength();
-            ResourceGuard rlock(write_settings.resource_link, cost);
+            CurrentThread::IOScope io_scope(write_settings.io_scheduling);
+
            Stopwatch watch;
            auto outcome = client_ptr->PutObject(request);
            watch.stop();
-            rlock.unlock();

            ProfileEvents::increment(ProfileEvents::WriteBufferFromS3Microseconds, watch.elapsedMicroseconds());
            if (blob_log)
@ -734,7 +730,6 @@ void WriteBufferFromS3::makeSinglepartUpload(WriteBufferFromS3::PartData && data
            }

            ProfileEvents::increment(ProfileEvents::WriteBufferFromS3RequestsErrors, 1);
-            write_settings.resource_link.accumulate(cost); // We assume no resource was used in case of failure

            if (outcome.GetError().GetErrorType() == Aws::S3::S3Errors::NO_SUCH_KEY)
            {
--- a/src/IO/WriteSettings.h
+++ b/src/IO/WriteSettings.h
@ -13,8 +13,7 @@ struct WriteSettings
    ThrottlerPtr remote_throttler;
    ThrottlerPtr local_throttler;

-    // Resource to be used during reading
-    ResourceLink resource_link;
+    IOSchedulingSettings io_scheduling;

    /// Filesystem cache settings
    bool enable_filesystem_cache_on_write_operations = false;
--- a/src/Interpreters/ActionsDAG.cpp
+++ b/src/Interpreters/ActionsDAG.cpp
@ -396,7 +396,7 @@ const ActionsDAG::Node * ActionsDAG::tryFindInOutputs(const std::string & name)
    return nullptr;
 }

-ActionsDAG::NodeRawConstPtrs ActionsDAG::findInOutpus(const Names & names) const
+ActionsDAG::NodeRawConstPtrs ActionsDAG::findInOutputs(const Names & names) const
 {
    NodeRawConstPtrs required_nodes;
    required_nodes.reserve(names.size());
@ -524,7 +524,7 @@ void ActionsDAG::removeUnusedActions(const NameSet & required_names, bool allow_

 void ActionsDAG::removeUnusedActions(const Names & required_names, bool allow_remove_inputs, bool allow_constant_folding)
 {
-    auto required_nodes = findInOutpus(required_names);
+    auto required_nodes = findInOutputs(required_names);
    outputs.swap(required_nodes);
    removeUnusedActions(allow_remove_inputs, allow_constant_folding);
 }
--- a/src/Interpreters/ActionsDAG.h
+++ b/src/Interpreters/ActionsDAG.h
@ -156,7 +156,7 @@ public:
    const Node * tryFindInOutputs(const std::string & name) const;

    /// Same, but for the list of names.
-    NodeRawConstPtrs findInOutpus(const Names & names) const;
+    NodeRawConstPtrs findInOutputs(const Names & names) const;

    /// Find first node with the same name in output nodes and replace it.
    /// If was not found, add node to outputs end.
@ -436,7 +436,7 @@ public:
    /// Returns a list of nodes representing atomic predicates.
    static NodeRawConstPtrs extractConjunctionAtoms(const Node * predicate);

-    /// Get a list of nodes. For every node, check if it can be compused using allowed subset of inputs.
+    /// Get a list of nodes. For every node, check if it can be computed using allowed subset of inputs.
    /// Returns only those nodes from the list which can be computed.
    static NodeRawConstPtrs filterNodesByAllowedInputs(
        NodeRawConstPtrs nodes,
--- a/src/Interpreters/AsynchronousInsertQueue.cpp
+++ b/src/Interpreters/AsynchronousInsertQueue.cpp
@ -33,6 +33,8 @@
 #include <Common/SensitiveDataMasker.h>
 #include <Common/SipHash.h>
 #include <Common/logger_useful.h>
+#include <Parsers/ASTExpressionList.h>
+#include <Parsers/ASTIdentifier.h>

 namespace CurrentMetrics
 {
@ -308,16 +310,32 @@ void AsynchronousInsertQueue::preprocessInsertQuery(const ASTPtr & query, const
        /* no_squash */ false,
        /* no_destination */ false,
        /* async_insert */ false);
+
    auto table = interpreter.getTable(insert_query);
    auto sample_block = InterpreterInsertQuery::getSampleBlock(insert_query, table, table->getInMemoryMetadataPtr(), query_context);

    if (!FormatFactory::instance().isInputFormat(insert_query.format))
-        throw Exception(ErrorCodes::UNKNOWN_FORMAT, "Unknown input format {}", insert_query.format);
+    {
+        if (insert_query.format.empty() && insert_query.infile)
+        {
+            const auto & in_file_node = insert_query.infile->as<ASTLiteral &>();
+            const auto in_file = in_file_node.value.safeGet<std::string>();
+            const auto in_file_format = FormatFactory::instance().getFormatFromFileName(in_file);
+            if (!FormatFactory::instance().isInputFormat(in_file_format))
+                throw Exception(ErrorCodes::UNKNOWN_FORMAT, "Unknown input INFILE format {}", in_file_format);
+        }
+        else
+            throw Exception(ErrorCodes::UNKNOWN_FORMAT, "Unknown input format {}", insert_query.format);
+    }

    /// For table functions we check access while executing
    /// InterpreterInsertQuery::getTable() -> ITableFunction::execute().
    if (insert_query.table_id)
        query_context->checkAccess(AccessType::INSERT, insert_query.table_id, sample_block.getNames());
+
+    insert_query.columns = std::make_shared<ASTExpressionList>();
+    for (const auto & column : sample_block)
+        insert_query.columns->children.push_back(std::make_shared<ASTIdentifier>(column.name));
 }

 AsynchronousInsertQueue::PushResult
@ -696,6 +714,17 @@ catch (...)
    tryLogCurrentException("AsynchronousInsertQueue", "Failed to add elements to AsynchronousInsertLog");
 }

+void convertBlockToHeader(Block & block, const Block & header)
+{
+    auto converting_dag = ActionsDAG::makeConvertingActions(
+        block.getColumnsWithTypeAndName(),
+        header.getColumnsWithTypeAndName(),
+        ActionsDAG::MatchColumnsMode::Name);
+
+    auto converting_actions = std::make_shared<ExpressionActions>(std::move(converting_dag));
+    converting_actions->execute(block);
+}
+
 String serializeQuery(const IAST & query, size_t max_length)
 {
    return query.hasSecretParts()
@ -791,6 +820,61 @@ try
    if (async_insert_log)
        log_elements.reserve(data->entries.size());

+    auto add_entry_to_asynchronous_insert_log = [&, query_by_format = NameToNameMap{}](
+        const InsertData::EntryPtr & entry,
+        const String & parsing_exception,
+        size_t num_rows,
+        size_t num_bytes) mutable
+    {
+        if (!async_insert_log)
+            return;
+
+        AsynchronousInsertLogElement elem;
+        elem.event_time = timeInSeconds(entry->create_time);
+        elem.event_time_microseconds = timeInMicroseconds(entry->create_time);
+        elem.database = query_database;
+        elem.table = query_table;
+        elem.format = entry->format;
+        elem.query_id = entry->query_id;
+        elem.bytes = num_bytes;
+        elem.rows = num_rows;
+        elem.exception = parsing_exception;
+        elem.data_kind = entry->chunk.getDataKind();
+        elem.timeout_milliseconds = data->timeout_ms.count();
+        elem.flush_query_id = insert_query_id;
+
+        auto get_query_by_format = [&](const String & format) -> const String &
+        {
+            auto [it, inserted] = query_by_format.try_emplace(format);
+            if (!inserted)
+                return it->second;
+
+            auto query = key.query->clone();
+            assert_cast<ASTInsertQuery &>(*query).format = format;
+            it->second = serializeQuery(*query, insert_context->getSettingsRef().log_queries_cut_to_length);
+            return it->second;
+        };
+
+        if (entry->chunk.getDataKind() == DataKind::Parsed)
+            elem.query_for_logging = key.query_str;
+        else
+            elem.query_for_logging = get_query_by_format(entry->format);
+
+        /// If there was a parsing error,
+        /// the entry won't be flushed anyway,
+        /// so add the log element immediately.
+        if (!elem.exception.empty())
+        {
+            elem.status = AsynchronousInsertLogElement::ParsingError;
+            async_insert_log->add(std::move(elem));
+        }
+        else
+        {
+            elem.status = AsynchronousInsertLogElement::Ok;
+            log_elements.push_back(std::move(elem));
+        }
+    };
+
    try
    {
        interpreter = std::make_unique<InterpreterInsertQuery>(
@ -819,49 +903,20 @@ try
    catch (...)
    {
        logExceptionBeforeStart(query_for_logging, insert_context, key.query, query_span, start_watch.elapsedMilliseconds());
+
+        if (async_insert_log)
+        {
+            for (const auto & entry : data->entries)
+                add_entry_to_asynchronous_insert_log(entry, /*parsing_exception=*/ "", /*num_rows=*/ 0, entry->chunk.byteSize());
+
+            auto exception = getCurrentExceptionMessage(false);
+            auto flush_time = std::chrono::system_clock::now();
+            appendElementsToLogSafe(*async_insert_log, std::move(log_elements), flush_time, exception);
+        }
        throw;
    }

-    auto add_entry_to_asynchronous_insert_log = [&](const auto & entry,
-                                                    const auto & entry_query_for_logging,
-                                                    const auto & exception,
-                                                    size_t num_rows,
-                                                    size_t num_bytes,
-                                                    Milliseconds timeout_ms)
-    {
-        if (!async_insert_log)
-            return;
-
-        AsynchronousInsertLogElement elem;
-        elem.event_time = timeInSeconds(entry->create_time);
-        elem.event_time_microseconds = timeInMicroseconds(entry->create_time);
-        elem.query_for_logging = entry_query_for_logging;
-        elem.database = query_database;
-        elem.table = query_table;
-        elem.format = entry->format;
-        elem.query_id = entry->query_id;
-        elem.bytes = num_bytes;
-        elem.rows = num_rows;
-        elem.exception = exception;
-        elem.data_kind = entry->chunk.getDataKind();
-        elem.timeout_milliseconds = timeout_ms.count();
-        elem.flush_query_id = insert_query_id;
-
-        /// If there was a parsing error,
-        /// the entry won't be flushed anyway,
-        /// so add the log element immediately.
-        if (!elem.exception.empty())
-        {
-            elem.status = AsynchronousInsertLogElement::ParsingError;
-            async_insert_log->add(std::move(elem));
-        }
-        else
-        {
-            log_elements.push_back(elem);
-        }
-    };
-
-    auto finish_entries = [&]
+    auto finish_entries = [&](size_t num_rows, size_t num_bytes)
    {
        for (const auto & entry : data->entries)
        {
@ -874,20 +929,7 @@ try
            auto flush_time = std::chrono::system_clock::now();
            appendElementsToLogSafe(*async_insert_log, std::move(log_elements), flush_time, "");
        }
-    };

-    Chunk chunk;
-    auto header = pipeline.getHeader();
-
-    if (key.data_kind == DataKind::Parsed)
-        chunk = processEntriesWithParsing(key, data, header, insert_context, log, add_entry_to_asynchronous_insert_log);
-    else
-        chunk = processPreprocessedEntries(key, data, header, insert_context, add_entry_to_asynchronous_insert_log);
-
-    ProfileEvents::increment(ProfileEvents::AsyncInsertRows, chunk.getNumRows());
-
-    auto log_and_add_finish_to_query_log = [&](size_t num_rows, size_t num_bytes)
-    {
        LOG_DEBUG(log, "Flushed {} rows, {} bytes for query '{}'", num_rows, num_bytes, key.query_str);
        queue_shard_flush_time_history.updateWithCurrentTime();

@ -896,16 +938,24 @@ try
            query_log_elem, insert_context, key.query, pipeline, pulling_pipeline, query_span, QueryCache::Usage::None, internal);
    };

-
-    if (chunk.getNumRows() == 0)
-    {
-        finish_entries();
-        log_and_add_finish_to_query_log(0, 0);
-        return;
-    }
-
    try
    {
+        Chunk chunk;
+        auto header = pipeline.getHeader();
+
+        if (key.data_kind == DataKind::Parsed)
+            chunk = processEntriesWithParsing(key, data, header, insert_context, log, add_entry_to_asynchronous_insert_log);
+        else
+            chunk = processPreprocessedEntries(data, header, add_entry_to_asynchronous_insert_log);
+
+        ProfileEvents::increment(ProfileEvents::AsyncInsertRows, chunk.getNumRows());
+
+        if (chunk.getNumRows() == 0)
+        {
+            finish_entries(/*num_rows=*/ 0, /*num_bytes=*/ 0);
+            return;
+        }
+
        size_t num_rows = chunk.getNumRows();
        size_t num_bytes = chunk.bytes();

@ -915,7 +965,7 @@ try
        CompletedPipelineExecutor completed_executor(pipeline);
        completed_executor.execute();

-        log_and_add_finish_to_query_log(num_rows, num_bytes);
+        finish_entries(num_rows, num_bytes);
    }
    catch (...)
    {
@ -929,8 +979,6 @@ try
        }
        throw;
    }
-
-    finish_entries();
 }
 catch (const Exception & e)
 {
@ -991,7 +1039,6 @@ Chunk AsynchronousInsertQueue::processEntriesWithParsing(

    StreamingFormatExecutor executor(header, format, std::move(on_error), std::move(adding_defaults_transform));
    auto chunk_info = std::make_shared<AsyncInsertInfo>();
-    auto query_for_logging = serializeQuery(*key.query, insert_context->getSettingsRef().log_queries_cut_to_length);

    for (const auto & entry : data->entries)
    {
@ -1009,11 +1056,17 @@ Chunk AsynchronousInsertQueue::processEntriesWithParsing(
        size_t num_rows = executor.execute(*buffer);

        total_rows += num_rows;
-        chunk_info->offsets.push_back(total_rows);
-        chunk_info->tokens.push_back(entry->async_dedup_token);

-        add_to_async_insert_log(entry, query_for_logging, current_exception, num_rows, num_bytes, data->timeout_ms);
+        /// For some reason, client can pass zero rows and bytes to server.
+        /// We don't update offsets in this case, because we assume every insert has some rows during dedup
+        /// but we have nothing to deduplicate for this insert.
+        if (num_rows > 0)
+        {
+            chunk_info->offsets.push_back(total_rows);
+            chunk_info->tokens.push_back(entry->async_dedup_token);
+        }

+        add_to_async_insert_log(entry, current_exception, num_rows, num_bytes);
        current_exception.clear();
        entry->resetChunk();
    }
@ -1025,30 +1078,14 @@ Chunk AsynchronousInsertQueue::processEntriesWithParsing(

 template <typename LogFunc>
 Chunk AsynchronousInsertQueue::processPreprocessedEntries(
-    const InsertQuery & key,
    const InsertDataPtr & data,
    const Block & header,
-    const ContextPtr & insert_context,
    LogFunc && add_to_async_insert_log)
 {
    size_t total_rows = 0;
    auto chunk_info = std::make_shared<AsyncInsertInfo>();
    auto result_columns = header.cloneEmptyColumns();

-    std::unordered_map<String, String> format_to_query;
-
-    auto get_query_by_format = [&](const String & format) -> const String &
-    {
-        auto [it, inserted] = format_to_query.try_emplace(format);
-        if (!inserted)
-            return it->second;
-
-        auto query = key.query->clone();
-        assert_cast<ASTInsertQuery &>(*query).format = format;
-        it->second = serializeQuery(*query, insert_context->getSettingsRef().log_queries_cut_to_length);
-        return it->second;
-    };
-
    for (const auto & entry : data->entries)
    {
        const auto * block = entry->chunk.asBlock();
@ -1056,17 +1093,26 @@ Chunk AsynchronousInsertQueue::processPreprocessedEntries(
            throw Exception(ErrorCodes::LOGICAL_ERROR,
                "Expected entry with data kind Preprocessed. Got: {}", entry->chunk.getDataKind());

-        auto columns = block->getColumns();
+        Block block_to_insert = *block;
+        if (!isCompatibleHeader(block_to_insert, header))
+            convertBlockToHeader(block_to_insert, header);
+
+        auto columns = block_to_insert.getColumns();
        for (size_t i = 0, s = columns.size(); i < s; ++i)
            result_columns[i]->insertRangeFrom(*columns[i], 0, columns[i]->size());

-        total_rows += block->rows();
-        chunk_info->offsets.push_back(total_rows);
-        chunk_info->tokens.push_back(entry->async_dedup_token);
+        total_rows += block_to_insert.rows();

-        const auto & query_for_logging = get_query_by_format(entry->format);
-        add_to_async_insert_log(entry, query_for_logging, "", block->rows(), block->bytes(), data->timeout_ms);
+        /// For some reason, client can pass zero rows and bytes to server.
+        /// We don't update offsets in this case, because we assume every insert has some rows during dedup,
+        /// but we have nothing to deduplicate for this insert.
+        if (block_to_insert.rows() > 0)
+        {
+            chunk_info->offsets.push_back(total_rows);
+            chunk_info->tokens.push_back(entry->async_dedup_token);
+        }

+        add_to_async_insert_log(entry, /*parsing_exception=*/ "", block_to_insert.rows(), block_to_insert.bytes());
        entry->resetChunk();
    }

--- a/src/Interpreters/AsynchronousInsertQueue.h
+++ b/src/Interpreters/AsynchronousInsertQueue.h
@ -288,10 +288,8 @@ private:

    template <typename LogFunc>
    static Chunk processPreprocessedEntries(
-        const InsertQuery & key,
        const InsertDataPtr & data,
        const Block & header,
-        const ContextPtr & insert_context,
        LogFunc && add_to_async_insert_log);

    template <typename E>
--- a/src/Interpreters/InterpreterDeleteQuery.cpp
+++ b/src/Interpreters/InterpreterDeleteQuery.cpp
@ -107,7 +107,9 @@ BlockIO InterpreterDeleteQuery::execute()
        String alter_query =
            "ALTER TABLE " + table->getStorageID().getFullTableName()
            + (delete_query.cluster.empty() ? "" : " ON CLUSTER " + backQuoteIfNeed(delete_query.cluster))
-            + " UPDATE `_row_exists` = 0 WHERE " + serializeAST(*delete_query.predicate);
+            + " UPDATE `_row_exists` = 0"
+            + (delete_query.partition ? " IN PARTITION " + serializeAST(*delete_query.partition) : "")
+            + " WHERE " + serializeAST(*delete_query.predicate);

        ParserAlterQuery parser;
        ASTPtr alter_ast = parseQuery(
--- a/src/Interpreters/Lemmatizers.cpp
+++ b/src/Interpreters/Lemmatizers.cpp
@ -33,25 +33,19 @@ public:
    }
 };

-/// Duplicate of code from StringUtils.h. Copied here for less dependencies.
-static bool startsWith(const std::string & s, const char * prefix)
-{
-    return s.size() >= strlen(prefix) && 0 == memcmp(s.data(), prefix, strlen(prefix));
-}
-
 Lemmatizers::Lemmatizers(const Poco::Util::AbstractConfiguration & config)
 {
-    String prefix = "lemmatizers";
-    Poco::Util::AbstractConfiguration::Keys keys;
+    const String prefix = "lemmatizers";

    if (!config.has(prefix))
-        throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "No lemmatizers specified in server config on prefix '{}'", prefix);
+        return;

+    Poco::Util::AbstractConfiguration::Keys keys;
    config.keys(prefix, keys);

    for (const auto & key : keys)
    {
-        if (startsWith(key, "lemmatizer"))
+        if (key.starts_with("lemmatizer"))
        {
            const auto & lemm_name = config.getString(prefix + "." + key + ".lang", "");
            const auto & lemm_path = config.getString(prefix + "." + key + ".path", "");
@ -81,13 +75,13 @@ Lemmatizers::LemmPtr Lemmatizers::getLemmatizer(const String & name)
    if (paths.find(name) != paths.end())
    {
        if (!std::filesystem::exists(paths[name]))
-            throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Incorrect path to lemmatizer: {}", paths[name]);
+            throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Path to lemmatizer does not exist: {}", paths[name]);

        lemmatizers[name] = std::make_shared<Lemmatizer>(paths[name]);
        return lemmatizers[name];
    }

-    throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Lemmatizer named: '{}' is not found", name);
+    throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Lemmatizer with the name '{}' was not found in the configuration", name);
 }

 }
--- a/src/Interpreters/MutationsInterpreter.cpp
+++ b/src/Interpreters/MutationsInterpreter.cpp
@ -147,6 +147,7 @@ ColumnDependencies getAllColumnDependencies(

 bool isStorageTouchedByMutations(
    MergeTreeData::DataPartPtr source_part,
+    MergeTreeData::MutationsSnapshotPtr mutations_snapshot,
    const StorageMetadataPtr & metadata_snapshot,
    const std::vector<MutationCommand> & commands,
    ContextPtr context)
@ -154,7 +155,7 @@ bool isStorageTouchedByMutations(
    if (commands.empty())
        return false;

-    auto storage_from_part = std::make_shared<StorageFromMergeTreeDataPart>(source_part);
+    auto storage_from_part = std::make_shared<StorageFromMergeTreeDataPart>(source_part, mutations_snapshot);
    bool all_commands_can_be_skipped = true;

    for (const auto & command : commands)
@ -285,8 +286,13 @@ MutationsInterpreter::Source::Source(StoragePtr storage_) : storage(std::move(st
 {
 }

-MutationsInterpreter::Source::Source(MergeTreeData & storage_, MergeTreeData::DataPartPtr source_part_)
-    : data(&storage_), part(std::move(source_part_))
+MutationsInterpreter::Source::Source(
+    MergeTreeData & storage_,
+    MergeTreeData::DataPartPtr source_part_,
+    AlterConversionsPtr alter_conversions_)
+    : data(&storage_)
+    , part(std::move(source_part_))
+    , alter_conversions(std::move(alter_conversions_))
 {
 }

@ -386,13 +392,14 @@ MutationsInterpreter::MutationsInterpreter(
 MutationsInterpreter::MutationsInterpreter(
    MergeTreeData & storage_,
    MergeTreeData::DataPartPtr source_part_,
+    AlterConversionsPtr alter_conversions_,
    StorageMetadataPtr metadata_snapshot_,
    MutationCommands commands_,
    Names available_columns_,
    ContextPtr context_,
    Settings settings_)
    : MutationsInterpreter(
-        Source(storage_, std::move(source_part_)),
+        Source(storage_, std::move(source_part_), std::move(alter_conversions_)),
        std::move(metadata_snapshot_), std::move(commands_),
        std::move(available_columns_), std::move(context_), std::move(settings_))
 {
@ -1218,7 +1225,7 @@ void MutationsInterpreter::Source::read(
        createReadFromPartStep(
            MergeTreeSequentialSourceType::Mutation,
            plan, *data, storage_snapshot,
-            part, required_columns,
+            part, alter_conversions, required_columns,
            apply_deleted_mask_, std::move(filter), context_,
            getLogger("MutationsInterpreter"));
    }
--- a/src/Interpreters/MutationsInterpreter.h
+++ b/src/Interpreters/MutationsInterpreter.h
@ -20,6 +20,7 @@ using QueryPipelineBuilderPtr = std::unique_ptr<QueryPipelineBuilder>;
 /// Return false if the data isn't going to be changed by mutations.
 bool isStorageTouchedByMutations(
    MergeTreeData::DataPartPtr source_part,
+    MergeTreeData::MutationsSnapshotPtr mutations_snapshot,
    const StorageMetadataPtr & metadata_snapshot,
    const std::vector<MutationCommand> & commands,
    ContextPtr context
@ -70,6 +71,7 @@ public:
    MutationsInterpreter(
        MergeTreeData & storage_,
        MergeTreeData::DataPartPtr source_part_,
+        AlterConversionsPtr alter_conversions_,
        StorageMetadataPtr metadata_snapshot_,
        MutationCommands commands_,
        Names available_columns_,
@ -137,7 +139,7 @@ public:
            bool can_execute_) const;

        explicit Source(StoragePtr storage_);
-        Source(MergeTreeData & storage_, MergeTreeData::DataPartPtr source_part_);
+        Source(MergeTreeData & storage_, MergeTreeData::DataPartPtr source_part_, AlterConversionsPtr alter_conversions_);

    private:
        StoragePtr storage;
@ -145,6 +147,7 @@ public:
        /// Special case for *MergeTree.
        MergeTreeData * data = nullptr;
        MergeTreeData::DataPartPtr part;
+        AlterConversionsPtr alter_conversions;
    };

 private:
--- a/src/Interpreters/ProcessorsProfileLog.cpp
+++ b/src/Interpreters/ProcessorsProfileLog.cpp
@ -30,6 +30,8 @@ ColumnsDescription ProcessorProfileLogElement::getColumnsDescription()
        {"id", std::make_shared<DataTypeUInt64>(), "ID of processor."},
        {"parent_ids", std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>()), "Parent processors IDs."},
        {"plan_step", std::make_shared<DataTypeUInt64>(), "ID of the query plan step which created this processor. The value is zero if the processor was not added from any step."},
+        {"plan_step_name", std::make_shared<DataTypeString>(), "Name of the query plan step which created this processor. The value is empty if the processor was not added from any step."},
+        {"plan_step_description", std::make_shared<DataTypeString>(), "Description of the query plan step which created this processor. The value is empty if the processor was not added from any step."},
        {"plan_group", std::make_shared<DataTypeUInt64>(), "Group of the processor if it was created by query plan step. A group is a logical partitioning of processors added from the same query plan step. Group is used only for beautifying the result of EXPLAIN PIPELINE result."},

        {"initial_query_id", std::make_shared<DataTypeString>(), "ID of the initial query (for distributed query execution)."},
@ -64,6 +66,8 @@ void ProcessorProfileLogElement::appendToBlock(MutableColumns & columns) const
    }

    columns[i++]->insert(plan_step);
+    columns[i++]->insert(plan_step_name);
+    columns[i++]->insert(plan_step_description);
    columns[i++]->insert(plan_group);
    columns[i++]->insertData(initial_query_id.data(), initial_query_id.size());
    columns[i++]->insertData(query_id.data(), query_id.size());
--- a/src/Interpreters/ProcessorsProfileLog.h
+++ b/src/Interpreters/ProcessorsProfileLog.h
@ -19,6 +19,8 @@ struct ProcessorProfileLogElement

    UInt64 plan_step{};
    UInt64 plan_group{};
+    String plan_step_name;
+    String plan_step_description;

    String initial_query_id;
    String query_id;
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@ -90,6 +90,7 @@ namespace ProfileEvents
    extern const Event SelectQueryTimeMicroseconds;
    extern const Event InsertQueryTimeMicroseconds;
    extern const Event OtherQueryTimeMicroseconds;
+    extern const Event RealTimeMicroseconds;
 }

 namespace DB
@ -398,9 +399,14 @@ void logQueryFinish(
        /// Update performance counters before logging to query_log
        CurrentThread::finalizePerformanceCounters();

-        QueryStatusInfo info = process_list_elem->getInfo(true, context->getSettingsRef().log_profile_events);
-        elem.type = QueryLogElementType::QUERY_FINISH;
+        std::shared_ptr<ProfileEvents::Counters::Snapshot> profile_counters;
+        QueryStatusInfo info = process_list_elem->getInfo(true, true);
+        if (context->getSettingsRef().log_profile_events)
+            profile_counters = info.profile_counters;
+        else
+            profile_counters.swap(info.profile_counters);

+        elem.type = QueryLogElementType::QUERY_FINISH;
        addStatusInfoToQueryLogElement(elem, info, query_ast, context);

        if (pulling_pipeline)
@ -419,6 +425,7 @@ void logQueryFinish(
        {
            Progress p;
            p.incrementPiecewiseAtomically(Progress{ResultProgress{elem.result_rows, elem.result_bytes}});
+            p.incrementRealTimeMicroseconds((*profile_counters)[ProfileEvents::RealTimeMicroseconds]);
            progress_callback(p);
        }

@ -471,6 +478,8 @@ void logQueryFinish(
                    processor_elem.parent_ids = std::move(parents);

                    processor_elem.plan_step = reinterpret_cast<std::uintptr_t>(processor->getQueryPlanStep());
+                    processor_elem.plan_step_name = processor->getPlanStepName();
+                    processor_elem.plan_step_description = processor->getPlanStepDescription();
                    processor_elem.plan_group = processor->getQueryPlanStepGroup();

                    processor_elem.processor_name = processor->getName();
@ -786,7 +795,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
            /// Verify that AST formatting is consistent:
            /// If you format AST, parse it back, and format it again, you get the same string.

-            String formatted1 = ast->formatWithPossiblyHidingSensitiveData(0, true, true, false);
+            String formatted1 = ast->formatWithPossiblyHidingSensitiveData(0, true, true, false, false, IdentifierQuotingStyle::Backticks);

            /// The query can become more verbose after formatting, so:
            size_t new_max_query_size = max_query_size > 0 ? (1000 + 2 * max_query_size) : 0;
@ -811,7 +820,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(

            chassert(ast2);

-            String formatted2 = ast2->formatWithPossiblyHidingSensitiveData(0, true, true, false);
+            String formatted2 = ast2->formatWithPossiblyHidingSensitiveData(0, true, true, false, false, IdentifierQuotingStyle::Backticks);

            if (formatted1 != formatted2)
                throw Exception(ErrorCodes::LOGICAL_ERROR,
--- a/src/Interpreters/formatWithPossiblyHidingSecrets.h
+++ b/src/Interpreters/formatWithPossiblyHidingSecrets.h
@ -26,7 +26,12 @@ inline String format(const SecretHidingFormatSettings & settings)
        && settings.ctx->getAccess()->isGranted(AccessType::displaySecretsInShowAndSelect);

    return settings.query.formatWithPossiblyHidingSensitiveData(
-        settings.max_length, settings.one_line, show_secrets, settings.ctx->getSettingsRef().print_pretty_type_names);
+        settings.max_length,
+        settings.one_line,
+        show_secrets,
+        settings.ctx->getSettingsRef().print_pretty_type_names,
+        settings.ctx->getSettingsRef().output_format_always_quote_identifiers,
+        settings.ctx->getSettingsRef().output_format_identifier_quoting_style);
 }

 }
--- a/Show More
+++ b/Show More