Merge branch 'master' into variant_inference

2024-11-26 09:32:01 +00:00 · 2024-08-09 23:37:27 +02:00 · 2024-08-09 23:37:27 +02:00 · 3e43cbe791
commit 3e43cbe791
parent 0ebe8e3551 5881f28718
496 changed files with 17737 additions and 5388 deletions
--- a/.github/workflows/jepsen.yml
+++ b/.github/workflows/jepsen.yml
@ -67,7 +67,7 @@ jobs:
        if: ${{ !cancelled() }}
        run: |
          export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
-          cat >> "$WORKFLOW_RESULT_FILE" << 'EOF'
+          cat > "$WORKFLOW_RESULT_FILE" << 'EOF'
          ${{ toJson(needs) }}
          EOF
          python3 ./tests/ci/ci_buddy.py --check-wf-status
--- a/.gitmodules
+++ b/.gitmodules
@ -341,7 +341,7 @@
 	url = https://github.com/graphitemaster/incbin.git
 [submodule "contrib/usearch"]
 	path = contrib/usearch
-	url = https://github.com/unum-cloud/usearch.git
+	url = https://github.com/ClickHouse/usearch.git
 [submodule "contrib/SimSIMD"]
 	path = contrib/SimSIMD
 	url = https://github.com/ashvardanian/SimSIMD.git
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -428,12 +428,17 @@ if (NOT SANITIZE)
    set (CMAKE_POSITION_INDEPENDENT_CODE OFF)
 endif()

-if (OS_LINUX AND NOT (ARCH_AARCH64 OR ARCH_S390X) AND NOT SANITIZE)
-    # Slightly more efficient code can be generated
-    # It's disabled for ARM because otherwise ClickHouse cannot run on Android.
+if (NOT OS_ANDROID AND OS_LINUX AND NOT ARCH_S390X AND NOT SANITIZE)
+    # Using '-no-pie' builds executables with fixed addresses, resulting in slightly more efficient code
+    # and keeping binary addresses constant even with ASLR enabled.
+    # Disabled on Android as it requires PIE: https://source.android.com/docs/security/enhancements#android-5
+    # Disabled on IBM S390X due to build issues with 'no-pie'
+    # Disabled with sanitizers to avoid issues with maximum relocation size: https://github.com/ClickHouse/ClickHouse/pull/49145
    set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fno-pie")
    set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -fno-pie")
    set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -no-pie -Wl,-no-pie")
+else ()
+    message (WARNING "ClickHouse is built as PIE, system.trace_log will contain invalid addresses after server restart.")
 endif ()

 if (ENABLE_TESTS)
--- a/base/poco/Foundation/include/Poco/ErrorHandler.h
+++ b/base/poco/Foundation/include/Poco/ErrorHandler.h
@ -21,6 +21,7 @@
 #include "Poco/Exception.h"
 #include "Poco/Foundation.h"
 #include "Poco/Mutex.h"
+#include "Poco/Message.h"


 namespace Poco
@ -78,6 +79,10 @@ public:
    ///
    /// The default implementation just breaks into the debugger.

+    virtual void logMessageImpl(Message::Priority priority, const std::string & msg) {}
+    /// Write a messages to the log
+    /// Useful for logging from Poco
+
    static void handle(const Exception & exc);
    /// Invokes the currently registered ErrorHandler.

@ -87,6 +92,9 @@ public:
    static void handle();
    /// Invokes the currently registered ErrorHandler.

+    static void logMessage(Message::Priority priority, const std::string & msg);
+    /// Invokes the currently registered ErrorHandler to log a message.
+
    static ErrorHandler * set(ErrorHandler * pHandler);
    /// Registers the given handler as the current error handler.
    ///
--- a/base/poco/Foundation/src/ErrorHandler.cpp
+++ b/base/poco/Foundation/src/ErrorHandler.cpp
@ -8,7 +8,7 @@
 // Copyright (c) 2005-2006, Applied Informatics Software Engineering GmbH.
 // and Contributors.
 //
-// SPDX-License-Identifier:	BSL-1.0
+// SPDX-License-Identifier:    BSL-1.0
 //


@ -35,79 +35,91 @@ ErrorHandler::~ErrorHandler()

 void ErrorHandler::exception(const Exception& exc)
 {
-	poco_debugger_msg(exc.what());
+    poco_debugger_msg(exc.what());
 }

-	
+    
 void ErrorHandler::exception(const std::exception& exc)
 {
-	poco_debugger_msg(exc.what());
+    poco_debugger_msg(exc.what());
 }


 void ErrorHandler::exception()
 {
-	poco_debugger_msg("unknown exception");
+    poco_debugger_msg("unknown exception");
 }


 void ErrorHandler::handle(const Exception& exc)
 {
-	FastMutex::ScopedLock lock(_mutex);
-	try
-	{
-		_pHandler->exception(exc);
-	}
-	catch (...)
-	{
-	}
+    FastMutex::ScopedLock lock(_mutex);
+    try
+    {
+        _pHandler->exception(exc);
+    }
+    catch (...)
+    {
+    }
 }

-	
+    
 void ErrorHandler::handle(const std::exception& exc)
 {
-	FastMutex::ScopedLock lock(_mutex);
-	try
-	{
-		_pHandler->exception(exc);
-	}
-	catch (...)
-	{
-	}
+    FastMutex::ScopedLock lock(_mutex);
+    try
+    {
+        _pHandler->exception(exc);
+    }
+    catch (...)
+    {
+    }
 }


 void ErrorHandler::handle()
 {
-	FastMutex::ScopedLock lock(_mutex);
-	try
-	{
-		_pHandler->exception();
-	}
-	catch (...)
-	{
-	}
+    FastMutex::ScopedLock lock(_mutex);
+    try
+    {
+        _pHandler->exception();
+    }
+    catch (...)
+    {
+    }
+}
+
+void ErrorHandler::logMessage(Message::Priority priority, const std::string & msg)
+{
+    FastMutex::ScopedLock lock(_mutex);
+    try
+    {
+        _pHandler->logMessageImpl(priority, msg);
+    }
+    catch (...)
+    {
+    }
 }


 ErrorHandler* ErrorHandler::set(ErrorHandler* pHandler)
 {
-	poco_check_ptr(pHandler);
+    poco_check_ptr(pHandler);

-	FastMutex::ScopedLock lock(_mutex);
-	ErrorHandler* pOld = _pHandler;
-	_pHandler = pHandler;
-	return pOld;
+    FastMutex::ScopedLock lock(_mutex);
+    ErrorHandler* pOld = _pHandler;
+    _pHandler = pHandler;
+    return pOld;
 }


 ErrorHandler* ErrorHandler::defaultHandler()
 {
-	// NOTE: Since this is called to initialize the static _pHandler
-	// variable, sh has to be a local static, otherwise we run
-	// into static initialization order issues.
-	static SingletonHolder<ErrorHandler> sh;
-	return sh.get();
+    // NOTE: Since this is called to initialize the static _pHandler
+    // variable, sh has to be a local static, otherwise we run
+    // into static initialization order issues.
+    static SingletonHolder<ErrorHandler> sh;
+    return sh.get();
 }


--- a/base/poco/Net/src/SocketImpl.cpp
+++ b/base/poco/Net/src/SocketImpl.cpp
@ -17,6 +17,7 @@
 #include "Poco/Net/StreamSocketImpl.h"
 #include "Poco/NumberFormatter.h"
 #include "Poco/Timestamp.h"
+#include "Poco/ErrorHandler.h"
 #include <string.h> // FD_SET needs memset on some platforms, so we can't use <cstring>


--- a/base/poco/Net/src/TCPServer.cpp
+++ b/base/poco/Net/src/TCPServer.cpp
@ -8,7 +8,7 @@
 // Copyright (c) 2005-2006, Applied Informatics Software Engineering GmbH.
 // and Contributors.
 //
-// SPDX-License-Identifier:	BSL-1.0
+// SPDX-License-Identifier:    BSL-1.0
 //


@ -44,190 +44,194 @@ TCPServerConnectionFilter::~TCPServerConnectionFilter()


 TCPServer::TCPServer(TCPServerConnectionFactory::Ptr pFactory, Poco::UInt16 portNumber, TCPServerParams::Ptr pParams):
-	_socket(ServerSocket(portNumber)),
-	_thread(threadName(_socket)),
-	_stopped(true)
-{	
-	Poco::ThreadPool& pool = Poco::ThreadPool::defaultPool();
-	if (pParams)
-	{
-		int toAdd = pParams->getMaxThreads() - pool.capacity();
-		if (toAdd > 0) pool.addCapacity(toAdd);
-	}
-	_pDispatcher = new TCPServerDispatcher(pFactory, pool, pParams);
-	
+    _socket(ServerSocket(portNumber)),
+    _thread(threadName(_socket)),
+    _stopped(true)
+{    
+    Poco::ThreadPool& pool = Poco::ThreadPool::defaultPool();
+    if (pParams)
+    {
+        int toAdd = pParams->getMaxThreads() - pool.capacity();
+        if (toAdd > 0) pool.addCapacity(toAdd);
+    }
+    _pDispatcher = new TCPServerDispatcher(pFactory, pool, pParams);
+    
 }


 TCPServer::TCPServer(TCPServerConnectionFactory::Ptr pFactory, const ServerSocket& socket, TCPServerParams::Ptr pParams):
-	_socket(socket),
-	_thread(threadName(socket)),
-	_stopped(true)
+    _socket(socket),
+    _thread(threadName(socket)),
+    _stopped(true)
 {
-	Poco::ThreadPool& pool = Poco::ThreadPool::defaultPool();
-	if (pParams)
-	{
-		int toAdd = pParams->getMaxThreads() - pool.capacity();
-		if (toAdd > 0) pool.addCapacity(toAdd);
-	}
-	_pDispatcher = new TCPServerDispatcher(pFactory, pool, pParams);
+    Poco::ThreadPool& pool = Poco::ThreadPool::defaultPool();
+    if (pParams)
+    {
+        int toAdd = pParams->getMaxThreads() - pool.capacity();
+        if (toAdd > 0) pool.addCapacity(toAdd);
+    }
+    _pDispatcher = new TCPServerDispatcher(pFactory, pool, pParams);
 }


 TCPServer::TCPServer(TCPServerConnectionFactory::Ptr pFactory, Poco::ThreadPool& threadPool, const ServerSocket& socket, TCPServerParams::Ptr pParams):
-	_socket(socket),
-	_pDispatcher(new TCPServerDispatcher(pFactory, threadPool, pParams)),
-	_thread(threadName(socket)),
-	_stopped(true)
+    _socket(socket),
+    _pDispatcher(new TCPServerDispatcher(pFactory, threadPool, pParams)),
+    _thread(threadName(socket)),
+    _stopped(true)
 {
 }


 TCPServer::~TCPServer()
 {
-	try
-	{
-		stop();
-		_pDispatcher->release();
-	}
-	catch (...)
-	{
-		poco_unexpected();
-	}
+    try
+    {
+        stop();
+        _pDispatcher->release();
+    }
+    catch (...)
+    {
+        poco_unexpected();
+    }
 }


 const TCPServerParams& TCPServer::params() const
 {
-	return _pDispatcher->params();
+    return _pDispatcher->params();
 }


 void TCPServer::start()
 {
-	poco_assert (_stopped);
+    poco_assert (_stopped);

-	_stopped = false;
-	_thread.start(*this);
+    _stopped = false;
+    _thread.start(*this);
 }

-	
+    
 void TCPServer::stop()
 {
-	if (!_stopped)
-	{
-		_stopped = true;
-		_thread.join();
-		_pDispatcher->stop();
-	}
+    if (!_stopped)
+    {
+        _stopped = true;
+        _thread.join();
+        _pDispatcher->stop();
+    }
 }


 void TCPServer::run()
 {
-	while (!_stopped)
-	{
-		Poco::Timespan timeout(250000);
-		try
-		{
-			if (_socket.poll(timeout, Socket::SELECT_READ))
-			{
-				try
-				{
-					StreamSocket ss = _socket.acceptConnection();
-					
-					if (!_pConnectionFilter || _pConnectionFilter->accept(ss))
-					{
-						// enable nodelay per default: OSX really needs that
+    while (!_stopped)
+    {
+        Poco::Timespan timeout(250000);
+        try
+        {
+            if (_socket.poll(timeout, Socket::SELECT_READ))
+            {
+                try
+                {
+                    StreamSocket ss = _socket.acceptConnection();
+                    
+                    if (!_pConnectionFilter || _pConnectionFilter->accept(ss))
+                    {
+                        // enable nodelay per default: OSX really needs that
 #if defined(POCO_OS_FAMILY_UNIX)
-						if (ss.address().family() != AddressFamily::UNIX_LOCAL)
+                        if (ss.address().family() != AddressFamily::UNIX_LOCAL)
 #endif
-						{
-							ss.setNoDelay(true);
-						}
-						_pDispatcher->enqueue(ss);
-					}
-				}
-				catch (Poco::Exception& exc)
-				{
-					ErrorHandler::handle(exc);
-				}
-				catch (std::exception& exc)
-				{
-					ErrorHandler::handle(exc);
-				}
-				catch (...)
-				{
-					ErrorHandler::handle();
-				}
-			}
-		}
-		catch (Poco::Exception& exc)
-		{
-			ErrorHandler::handle(exc);
-			// possibly a resource issue since poll() failed;
-			// give some time to recover before trying again
-			Poco::Thread::sleep(50); 
-		}
-	}
+                        {
+                            ss.setNoDelay(true);
+                        }
+                        _pDispatcher->enqueue(ss);
+                    }
+                    else
+                    {
+                        ErrorHandler::logMessage(Message::PRIO_WARNING, "Filtered out connection from " + ss.peerAddress().toString());
+                    }
+                }
+                catch (Poco::Exception& exc)
+                {
+                    ErrorHandler::handle(exc);
+                }
+                catch (std::exception& exc)
+                {
+                    ErrorHandler::handle(exc);
+                }
+                catch (...)
+                {
+                    ErrorHandler::handle();
+                }
+            }
+        }
+        catch (Poco::Exception& exc)
+        {
+            ErrorHandler::handle(exc);
+            // possibly a resource issue since poll() failed;
+            // give some time to recover before trying again
+            Poco::Thread::sleep(50); 
+        }
+    }
 }


 int TCPServer::currentThreads() const
 {
-	return _pDispatcher->currentThreads();
+    return _pDispatcher->currentThreads();
 }


 int TCPServer::maxThreads() const
 {
-	return _pDispatcher->maxThreads();
+    return _pDispatcher->maxThreads();
 }

-	
+    
 int TCPServer::totalConnections() const
 {
-	return _pDispatcher->totalConnections();
+    return _pDispatcher->totalConnections();
 }


 int TCPServer::currentConnections() const
 {
-	return _pDispatcher->currentConnections();
+    return _pDispatcher->currentConnections();
 }


 int TCPServer::maxConcurrentConnections() const
 {
-	return _pDispatcher->maxConcurrentConnections();
+    return _pDispatcher->maxConcurrentConnections();
 }

-	
+    
 int TCPServer::queuedConnections() const
 {
-	return _pDispatcher->queuedConnections();
+    return _pDispatcher->queuedConnections();
 }


 int TCPServer::refusedConnections() const
 {
-	return _pDispatcher->refusedConnections();
+    return _pDispatcher->refusedConnections();
 }


 void TCPServer::setConnectionFilter(const TCPServerConnectionFilter::Ptr& pConnectionFilter)
 {
-	poco_assert (_stopped);
+    poco_assert (_stopped);

-	_pConnectionFilter = pConnectionFilter;
+    _pConnectionFilter = pConnectionFilter;
 }


 std::string TCPServer::threadName(const ServerSocket& socket)
 {
-	std::string name("TCPServer: ");
-	name.append(socket.address().toString());
-	return name;
+    std::string name("TCPServer: ");
+    name.append(socket.address().toString());
+    return name;

 }

--- a/base/poco/Net/src/TCPServerDispatcher.cpp
+++ b/base/poco/Net/src/TCPServerDispatcher.cpp
@ -8,7 +8,7 @@
 // Copyright (c) 2005-2007, Applied Informatics Software Engineering GmbH.
 // and Contributors.
 //
-// SPDX-License-Identifier:	BSL-1.0
+// SPDX-License-Identifier:    BSL-1.0
 //


@ -33,44 +33,44 @@ namespace Net {
 class TCPConnectionNotification: public Notification
 {
 public:
-	TCPConnectionNotification(const StreamSocket& socket):
-		_socket(socket)
-	{
-	}
-	
-	~TCPConnectionNotification()
-	{
-	}
-	
-	const StreamSocket& socket() const
-	{
-		return _socket;
-	}
+    TCPConnectionNotification(const StreamSocket& socket):
+        _socket(socket)
+    {
+    }
+    
+    ~TCPConnectionNotification()
+    {
+    }
+    
+    const StreamSocket& socket() const
+    {
+        return _socket;
+    }

 private:
-	StreamSocket _socket;
+    StreamSocket _socket;
 };


 TCPServerDispatcher::TCPServerDispatcher(TCPServerConnectionFactory::Ptr pFactory, Poco::ThreadPool& threadPool, TCPServerParams::Ptr pParams):
-	_rc(1),
-	_pParams(pParams),
-	_currentThreads(0),
-	_totalConnections(0),
-	_currentConnections(0),
-	_maxConcurrentConnections(0),
-	_refusedConnections(0),
-	_stopped(false),
-	_pConnectionFactory(pFactory),
-	_threadPool(threadPool)
+    _rc(1),
+    _pParams(pParams),
+    _currentThreads(0),
+    _totalConnections(0),
+    _currentConnections(0),
+    _maxConcurrentConnections(0),
+    _refusedConnections(0),
+    _stopped(false),
+    _pConnectionFactory(pFactory),
+    _threadPool(threadPool)
 {
-	poco_check_ptr (pFactory);
+    poco_check_ptr (pFactory);

-	if (!_pParams)
-		_pParams = new TCPServerParams;
-	
-	if (_pParams->getMaxThreads() == 0)
-		_pParams->setMaxThreads(threadPool.capacity());
+    if (!_pParams)
+        _pParams = new TCPServerParams;
+    
+    if (_pParams->getMaxThreads() == 0)
+        _pParams->setMaxThreads(threadPool.capacity());
 }


@ -81,161 +81,184 @@ TCPServerDispatcher::~TCPServerDispatcher()

 void TCPServerDispatcher::duplicate()
 {
-	++_rc;
+    ++_rc;
 }


 void TCPServerDispatcher::release()
 {
-	if (--_rc == 0) delete this;
+    if (--_rc == 0) delete this;
 }


 void TCPServerDispatcher::run()
 {
-	AutoPtr<TCPServerDispatcher> guard(this); // ensure object stays alive
+    AutoPtr<TCPServerDispatcher> guard(this); // ensure object stays alive

-	int idleTime = (int) _pParams->getThreadIdleTime().totalMilliseconds();
+    int idleTime = (int) _pParams->getThreadIdleTime().totalMilliseconds();

-	for (;;)
-	{
-		try
-		{
-			AutoPtr<Notification> pNf = _queue.waitDequeueNotification(idleTime);
-			if (pNf && !_stopped)
-			{
-				TCPConnectionNotification* pCNf = dynamic_cast<TCPConnectionNotification*>(pNf.get());
-				if (pCNf)
-				{
-					beginConnection();
-					if (!_stopped)
-					{
-						std::unique_ptr<TCPServerConnection> pConnection(_pConnectionFactory->createConnection(pCNf->socket()));
-						poco_check_ptr(pConnection.get());
-						pConnection->start();
-					}
-					/// endConnection() should be called after destroying TCPServerConnection,
-					/// otherwise currentConnections() could become zero while some connections are yet still alive.
-					endConnection();
-				}
-			}
-		}
-		catch (Poco::Exception &exc) { ErrorHandler::handle(exc); }
-		catch (std::exception &exc)  { ErrorHandler::handle(exc); }
-		catch (...)                  { ErrorHandler::handle();    }
-		FastMutex::ScopedLock lock(_mutex);
-		if (_stopped || (_currentThreads > 1 && _queue.empty()))
-		{
-			--_currentThreads;
-			break;
-		}
-	}
+    for (;;)
+    {
+        try
+        {
+            AutoPtr<Notification> pNf = _queue.waitDequeueNotification(idleTime);
+            if (pNf && !_stopped)
+            {
+                TCPConnectionNotification* pCNf = dynamic_cast<TCPConnectionNotification*>(pNf.get());
+                if (pCNf)
+                {
+                    beginConnection();
+                    if (!_stopped)
+                    {
+                        std::unique_ptr<TCPServerConnection> pConnection(_pConnectionFactory->createConnection(pCNf->socket()));
+                        poco_check_ptr(pConnection.get());
+                        pConnection->start();
+                    }
+                    /// endConnection() should be called after destroying TCPServerConnection,
+                    /// otherwise currentConnections() could become zero while some connections are yet still alive.
+                    endConnection();
+                }
+            }
+        }
+        catch (Poco::Exception &exc) { ErrorHandler::handle(exc); }
+        catch (std::exception &exc)  { ErrorHandler::handle(exc); }
+        catch (...)                  { ErrorHandler::handle();    }
+        FastMutex::ScopedLock lock(_mutex);
+        if (_stopped || (_currentThreads > 1 && _queue.empty()))
+        {
+            --_currentThreads;
+            break;
+        }
+    }
 }


 namespace
 {
-	static const std::string threadName("TCPServerConnection");
+    static const std::string threadName("TCPServerConnection");
 }

-	
+    
 void TCPServerDispatcher::enqueue(const StreamSocket& socket)
 {
-	FastMutex::ScopedLock lock(_mutex);
+    FastMutex::ScopedLock lock(_mutex);

-	if (_queue.size() < _pParams->getMaxQueued())
-	{
-		if (!_queue.hasIdleThreads() && _currentThreads < _pParams->getMaxThreads())
-		{
-			try
-			{
+    ErrorHandler::logMessage(Message::PRIO_TEST, "Queue size: " + std::to_string(_queue.size()) +
+                                 ", current threads: " + std::to_string(_currentThreads) +
+                                 ", threads in pool: " + std::to_string(_threadPool.allocated()) +
+                                 ", current connections: " + std::to_string(_currentConnections));
+
+
+    if (_queue.size() < _pParams->getMaxQueued())
+    {
+        /// NOTE: the condition below is wrong.
+        /// Since the thread pool is shared between multiple servers/TCPServerDispatchers,
+        /// _currentThreads < _pParams->getMaxThreads() will be true when the pool is actually saturated.
+        /// As a result, queue is useless and connections never wait in queue.
+        /// Instead, we (mistakenly) think that we can create a thread for this connection, but we fail to create it
+        /// and the connection get rejected.
+        /// We could check _currentThreads < _threadPool.allocated() to make it work,
+        /// but it's not clear if we want to make it work
+        /// because it may be better to reject connection immediately if we don't have resources to handle it.
+        if (!_queue.hasIdleThreads() && _currentThreads < _pParams->getMaxThreads())
+        {
+            try
+            {
                this->duplicate();
-				_threadPool.startWithPriority(_pParams->getThreadPriority(), *this, threadName);
-				++_currentThreads;
-			}
-			catch (Poco::Exception& exc)
-			{
+                _threadPool.startWithPriority(_pParams->getThreadPriority(), *this, threadName);
+                ++_currentThreads;
+            }
+            catch (Poco::Exception& exc)
+            {
+                ErrorHandler::logMessage(Message::PRIO_WARNING, "Got an exception while starting thread for connection from " +
+                                             socket.peerAddress().toString());
+                ErrorHandler::handle(exc);
                this->release();
-				++_refusedConnections;
-				std::cerr << "Got exception while starting thread for connection. Error code: "
-						  << exc.code() << ", message: '" << exc.displayText() << "'" << std::endl;
-				return;
-			}
-		}
-		_queue.enqueueNotification(new TCPConnectionNotification(socket));
-	}
-	else
-	{
-		++_refusedConnections;
-	}
+                ++_refusedConnections;
+                return;
+            }
+        }
+        else if (!_queue.hasIdleThreads())
+        {
+            ErrorHandler::logMessage(Message::PRIO_TRACE, "Don't have idle threads, adding connection from " +
+                                         socket.peerAddress().toString() + " to the queue, size: " + std::to_string(_queue.size()));
+        }
+        _queue.enqueueNotification(new TCPConnectionNotification(socket));
+    }
+    else
+    {
+        ErrorHandler::logMessage(Message::PRIO_WARNING, "Refusing connection from " + socket.peerAddress().toString() +
+                                     ", reached max queue size " + std::to_string(_pParams->getMaxQueued()));
+        ++_refusedConnections;
+    }
 }


 void TCPServerDispatcher::stop()
 {
-	_stopped = true;
-	_queue.clear();
-	_queue.wakeUpAll();
+    _stopped = true;
+    _queue.clear();
+    _queue.wakeUpAll();
 }


 int TCPServerDispatcher::currentThreads() const
 {
-	return _currentThreads;
+    return _currentThreads;
 }

 int TCPServerDispatcher::maxThreads() const
 {
-	FastMutex::ScopedLock lock(_mutex);
-	
-	return _threadPool.capacity();
+    FastMutex::ScopedLock lock(_mutex);
+    
+    return _threadPool.capacity();
 }


 int TCPServerDispatcher::totalConnections() const
 {
-	return _totalConnections;
+    return _totalConnections;
 }


 int TCPServerDispatcher::currentConnections() const
 {
-	return _currentConnections;
+    return _currentConnections;
 }


 int TCPServerDispatcher::maxConcurrentConnections() const
 {
-	return _maxConcurrentConnections;
+    return _maxConcurrentConnections;
 }


 int TCPServerDispatcher::queuedConnections() const
 {
-	return _queue.size();
+    return _queue.size();
 }


 int TCPServerDispatcher::refusedConnections() const
 {
-	return _refusedConnections;
+    return _refusedConnections;
 }


 void TCPServerDispatcher::beginConnection()
 {
-	FastMutex::ScopedLock lock(_mutex);
+    FastMutex::ScopedLock lock(_mutex);

-	++_totalConnections;
-	++_currentConnections;
-	if (_currentConnections > _maxConcurrentConnections)
-		_maxConcurrentConnections.store(_currentConnections);
+    ++_totalConnections;
+    ++_currentConnections;
+    if (_currentConnections > _maxConcurrentConnections)
+        _maxConcurrentConnections.store(_currentConnections);
 }


 void TCPServerDispatcher::endConnection()
 {
-	--_currentConnections;
+    --_currentConnections;
 }


--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@ -71,7 +71,6 @@ add_contrib (zlib-ng-cmake zlib-ng)
 add_contrib (bzip2-cmake bzip2)
 add_contrib (minizip-ng-cmake minizip-ng)
 add_contrib (snappy-cmake snappy)
-add_contrib (rocksdb-cmake rocksdb)
 add_contrib (thrift-cmake thrift)
 # parquet/arrow/orc
 add_contrib (arrow-cmake arrow) # requires: snappy, thrift, double-conversion
@ -148,6 +147,7 @@ add_contrib (hive-metastore-cmake hive-metastore) # requires: thrift, avro, arro
 add_contrib (cppkafka-cmake cppkafka)
 add_contrib (libpqxx-cmake libpqxx)
 add_contrib (libpq-cmake libpq)
+add_contrib (rocksdb-cmake rocksdb) # requires: jemalloc, snappy, zlib, lz4, zstd, liburing
 add_contrib (nuraft-cmake NuRaft)
 add_contrib (fast_float-cmake fast_float)
 add_contrib (idna-cmake idna)
--- a/contrib/librdkafka
+++ b/contrib/librdkafka
@ -1 +1 @@
-Subproject commit 2d2aab6f5b79db1cfca15d7bf0dee75d00d82082
+Subproject commit 39d4ed49ccf3406e2bf825d5d7b0903b5a290782
--- a/contrib/qpl
+++ b/contrib/qpl
@ -1 +1 @@
-Subproject commit d4715e0e79896b85612158e135ee1a85f3b3e04d
+Subproject commit c2ced94c53c1ee22191201a59878e9280bc9b9b8
--- a/contrib/qpl-cmake/CMakeLists.txt
+++ b/contrib/qpl-cmake/CMakeLists.txt
@ -4,7 +4,6 @@ set (QPL_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/qpl")
 set (QPL_SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/qpl/sources")
 set (QPL_BINARY_DIR "${ClickHouse_BINARY_DIR}/build/contrib/qpl")
 set (EFFICIENT_WAIT OFF)
-set (BLOCK_ON_FAULT ON)
 set (LOG_HW_INIT OFF)
 set (SANITIZE_MEMORY OFF)
 set (SANITIZE_THREADS OFF)
@ -16,16 +15,20 @@ function(GetLibraryVersion _content _outputVar)
    SET(${_outputVar} ${CMAKE_MATCH_1} PARENT_SCOPE)
 endfunction()

-set (QPL_VERSION 1.2.0)
+set (QPL_VERSION 1.6.0)

 message(STATUS "Intel QPL version: ${QPL_VERSION}")

-# There are 5 source subdirectories under $QPL_SRC_DIR: isal, c_api, core-sw, middle-layer, c_api.
-# Generate 8 library targets: middle_layer_lib, isal, isal_asm, qplcore_px, qplcore_avx512, qplcore_sw_dispatcher, core_iaa, middle_layer_lib.
+# There are 5 source subdirectories under $QPL_SRC_DIR: c_api, core-iaa, core-sw, middle-layer and isal.
+# Generate 8 library targets: qpl_c_api, core_iaa, qplcore_px, qplcore_avx512, qplcore_sw_dispatcher, middle_layer_lib, isal and isal_asm,
+# which are then combined into static or shared qpl.
 # Output ch_contrib::qpl by linking with 8 library targets.

-# The qpl submodule comes with its own version of isal. It contains code which does not exist in upstream isal. It would be nice to link
-# only upstream isal (ch_contrib::isal) but at this point we can't.
+# Note, QPL has integrated a customized version of ISA-L to meet specific needs.
+# This version has been significantly modified and there are no plans to maintain compatibility with the upstream version
+# or upgrade the current copy.
+
+## cmake/CompileOptions.cmake and automatic wrappers generation

 # ==========================================================================
 # Copyright (C) 2022 Intel Corporation
@ -442,6 +445,7 @@ function(generate_unpack_kernel_arrays current_directory PLATFORMS_LIST)
    endforeach()
 endfunction()

+# [SUBDIR]isal

 enable_language(ASM_NASM)

@ -479,7 +483,6 @@ set(ISAL_ASM_SRC ${QPL_SRC_DIR}/isal/igzip/igzip_body.asm
                 ${QPL_SRC_DIR}/isal/igzip/igzip_set_long_icf_fg_04.asm
                 ${QPL_SRC_DIR}/isal/igzip/igzip_set_long_icf_fg_06.asm
                 ${QPL_SRC_DIR}/isal/igzip/igzip_multibinary.asm
-                 ${QPL_SRC_DIR}/isal/igzip/stdmac.asm
                 ${QPL_SRC_DIR}/isal/crc/crc_multibinary.asm
                 ${QPL_SRC_DIR}/isal/crc/crc32_gzip_refl_by8.asm
                 ${QPL_SRC_DIR}/isal/crc/crc32_gzip_refl_by8_02.asm
@ -505,7 +508,6 @@ set_property(GLOBAL APPEND PROPERTY QPL_LIB_DEPS
 # Setting external and internal interfaces for ISA-L library
 target_include_directories(isal
                        PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/isal/include>
-                        PRIVATE ${QPL_SRC_DIR}/isal/include
                        PUBLIC ${QPL_SRC_DIR}/isal/igzip)

 set_target_properties(isal PROPERTIES
@ -617,12 +619,9 @@ target_compile_options(qplcore_sw_dispatcher

 # [SUBDIR]core-iaa
 file(GLOB HW_PATH_SRC ${QPL_SRC_DIR}/core-iaa/sources/aecs/*.c
-                      ${QPL_SRC_DIR}/core-iaa/sources/aecs/*.cpp
                      ${QPL_SRC_DIR}/core-iaa/sources/driver_loader/*.c
-                      ${QPL_SRC_DIR}/core-iaa/sources/driver_loader/*.cpp
                      ${QPL_SRC_DIR}/core-iaa/sources/descriptors/*.c
-                      ${QPL_SRC_DIR}/core-iaa/sources/descriptors/*.cpp
-                      ${QPL_SRC_DIR}/core-iaa/sources/bit_rev.c)
+                      ${QPL_SRC_DIR}/core-iaa/sources/*.c)

 # Create library
 add_library(core_iaa OBJECT ${HW_PATH_SRC})
@ -634,31 +633,27 @@ target_include_directories(core_iaa
        PRIVATE ${UUID_DIR}
        PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-iaa/include>
        PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-iaa/sources/include>
-        PRIVATE $<BUILD_INTERFACE:${QPL_PROJECT_DIR}/include>  # status.h in own_checkers.h
-        PRIVATE $<BUILD_INTERFACE:${QPL_PROJECT_DIR}/sources/c_api> # own_checkers.h
+        PRIVATE $<BUILD_INTERFACE:${QPL_PROJECT_DIR}/include> # status.h in own_checkers.h
+        PRIVATE $<TARGET_PROPERTY:qpl_c_api,INTERFACE_INCLUDE_DIRECTORIES> # for own_checkers.h
        PRIVATE $<TARGET_PROPERTY:qplcore_sw_dispatcher,INTERFACE_INCLUDE_DIRECTORIES>)

 target_compile_features(core_iaa PRIVATE c_std_11)

 target_compile_definitions(core_iaa PRIVATE QPL_BADARG_CHECK
-        PRIVATE $<$<BOOL:${BLOCK_ON_FAULT}>: BLOCK_ON_FAULT_ENABLED>
        PRIVATE $<$<BOOL:${LOG_HW_INIT}>:LOG_HW_INIT>
        PRIVATE $<$<BOOL:${DYNAMIC_LOADING_LIBACCEL_CONFIG}>:DYNAMIC_LOADING_LIBACCEL_CONFIG>)

 # [SUBDIR]middle-layer
 file(GLOB MIDDLE_LAYER_SRC
-        ${QPL_SRC_DIR}/middle-layer/analytics/*.cpp
-        ${QPL_SRC_DIR}/middle-layer/c_wrapper/*.cpp
-        ${QPL_SRC_DIR}/middle-layer/checksum/*.cpp
+        ${QPL_SRC_DIR}/middle-layer/accelerator/*.cpp
+	    ${QPL_SRC_DIR}/middle-layer/analytics/*.cpp
        ${QPL_SRC_DIR}/middle-layer/common/*.cpp
        ${QPL_SRC_DIR}/middle-layer/compression/*.cpp
        ${QPL_SRC_DIR}/middle-layer/compression/*/*.cpp
        ${QPL_SRC_DIR}/middle-layer/compression/*/*/*.cpp
        ${QPL_SRC_DIR}/middle-layer/dispatcher/*.cpp
        ${QPL_SRC_DIR}/middle-layer/other/*.cpp
-        ${QPL_SRC_DIR}/middle-layer/util/*.cpp
-        ${QPL_SRC_DIR}/middle-layer/inflate/*.cpp
-        ${QPL_SRC_DIR}/core-iaa/sources/accelerator/*.cpp) # todo
+        ${QPL_SRC_DIR}/middle-layer/util/*.cpp)

 add_library(middle_layer_lib OBJECT
        ${MIDDLE_LAYER_SRC})
@ -667,6 +662,7 @@ set_property(GLOBAL APPEND PROPERTY QPL_LIB_DEPS
        $<TARGET_OBJECTS:middle_layer_lib>)

 target_compile_options(middle_layer_lib
+        PRIVATE $<$<C_COMPILER_ID:GNU,Clang>:$<$<CONFIG:Release>:-O3;-U_FORTIFY_SOURCE;-D_FORTIFY_SOURCE=2>>
        PRIVATE ${QPL_LINUX_TOOLCHAIN_CPP_EMBEDDED_FLAGS})

 target_compile_definitions(middle_layer_lib
@ -682,6 +678,7 @@ target_include_directories(middle_layer_lib
        PRIVATE ${UUID_DIR}
        PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/middle-layer>
        PUBLIC $<TARGET_PROPERTY:_qpl,INTERFACE_INCLUDE_DIRECTORIES>
+        PRIVATE $<TARGET_PROPERTY:qpl_c_api,INTERFACE_INCLUDE_DIRECTORIES>
        PUBLIC $<TARGET_PROPERTY:qplcore_sw_dispatcher,INTERFACE_INCLUDE_DIRECTORIES>
        PUBLIC $<TARGET_PROPERTY:isal,INTERFACE_INCLUDE_DIRECTORIES>
        PUBLIC $<TARGET_PROPERTY:core_iaa,INTERFACE_INCLUDE_DIRECTORIES>)
@ -689,31 +686,54 @@ target_include_directories(middle_layer_lib
 target_compile_definitions(middle_layer_lib PUBLIC -DQPL_LIB)

 # [SUBDIR]c_api
-file(GLOB_RECURSE QPL_C_API_SRC
-        ${QPL_SRC_DIR}/c_api/*.c
-        ${QPL_SRC_DIR}/c_api/*.cpp)
+file(GLOB QPL_C_API_SRC
+        ${QPL_SRC_DIR}/c_api/compression_operations/*.c
+        ${QPL_SRC_DIR}/c_api/compression_operations/*.cpp
+	    ${QPL_SRC_DIR}/c_api/filter_operations/*.cpp
+	    ${QPL_SRC_DIR}/c_api/legacy_hw_path/*.c
+	    ${QPL_SRC_DIR}/c_api/legacy_hw_path/*.cpp
+	    ${QPL_SRC_DIR}/c_api/other_operations/*.cpp
+	    ${QPL_SRC_DIR}/c_api/serialization/*.cpp
+	    ${QPL_SRC_DIR}/c_api/*.cpp)
+
+add_library(qpl_c_api OBJECT ${QPL_C_API_SRC})
+
+target_include_directories(qpl_c_api
+	    PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/c_api/>
+        PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/include/> $<INSTALL_INTERFACE:include>
+        PRIVATE $<TARGET_PROPERTY:middle_layer_lib,INTERFACE_INCLUDE_DIRECTORIES>)
+
+set_target_properties(qpl_c_api PROPERTIES
+	$<$<C_COMPILER_ID:GNU,Clang>:C_STANDARD 17
+	CXX_STANDARD 17)
+
+target_compile_options(qpl_c_api
+        PRIVATE $<$<C_COMPILER_ID:GNU,Clang>:$<$<CONFIG:Release>:-O3;-U_FORTIFY_SOURCE;-D_FORTIFY_SOURCE=2>>
+	    PRIVATE $<$<COMPILE_LANG_AND_ID:CXX,GNU,Clang>:${QPL_LINUX_TOOLCHAIN_CPP_EMBEDDED_FLAGS}>)
+
+target_compile_definitions(qpl_c_api
+        PUBLIC -DQPL_BADARG_CHECK # own_checkers.h
+        PUBLIC -DQPL_LIB          # needed for middle_layer_lib
+        PUBLIC $<$<BOOL:${LOG_HW_INIT}>:LOG_HW_INIT>) # needed for middle_layer_lib
+
+set_property(GLOBAL APPEND PROPERTY QPL_LIB_DEPS
+        $<TARGET_OBJECTS:qpl_c_api>)
+
+# Final _qpl target

 get_property(LIB_DEPS GLOBAL PROPERTY QPL_LIB_DEPS)

-add_library(_qpl STATIC ${QPL_C_API_SRC} ${LIB_DEPS})
+add_library(_qpl STATIC ${LIB_DEPS})

 target_include_directories(_qpl
-        PUBLIC $<BUILD_INTERFACE:${QPL_PROJECT_DIR}/include/> $<INSTALL_INTERFACE:include>
-        PRIVATE $<TARGET_PROPERTY:middle_layer_lib,INTERFACE_INCLUDE_DIRECTORIES>
-        PRIVATE $<BUILD_INTERFACE:${QPL_SRC_DIR}/c_api>)
+        PUBLIC $<BUILD_INTERFACE:${QPL_PROJECT_DIR}/include/> $<INSTALL_INTERFACE:include>)

-target_compile_options(_qpl
-        PRIVATE ${QPL_LINUX_TOOLCHAIN_CPP_EMBEDDED_FLAGS})

 target_compile_definitions(_qpl
-        PRIVATE -DQPL_LIB
-        PRIVATE -DQPL_BADARG_CHECK
-        PRIVATE $<$<BOOL:${DYNAMIC_LOADING_LIBACCEL_CONFIG}>:DYNAMIC_LOADING_LIBACCEL_CONFIG>
        PUBLIC -DENABLE_QPL_COMPRESSION)

 target_link_libraries(_qpl
-        PRIVATE ch_contrib::accel-config
-        PRIVATE ch_contrib::isal)
+        PRIVATE ch_contrib::accel-config)

 target_include_directories(_qpl SYSTEM BEFORE
        PUBLIC "${QPL_PROJECT_DIR}/include"
--- a/contrib/rocksdb
+++ b/contrib/rocksdb
@ -1 +1 @@
-Subproject commit 49ce8a1064dd1ad89117899839bf136365e49e79
+Subproject commit 5f003e4a22d2e48e37c98d9620241237cd30dd24
--- a/contrib/rocksdb-cmake/CMakeLists.txt
+++ b/contrib/rocksdb-cmake/CMakeLists.txt
@ -5,36 +5,38 @@ if (NOT ENABLE_ROCKSDB OR NO_SSE3_OR_HIGHER) # assumes SSE4.2 and PCLMUL
  return()
 endif()

-# not in original build system, otherwise xxHash.cc fails to compile with ClickHouse C++23 default
-set (CMAKE_CXX_STANDARD 20)
-
-# Always disable jemalloc for rocksdb by default because it introduces non-standard jemalloc APIs
-option(WITH_JEMALLOC "build with JeMalloc" OFF)
-
-option(WITH_LIBURING "build with liburing" OFF) # TODO could try to enable this conditionally, depending on ClickHouse's ENABLE_LIBURING
-
 # ClickHouse cannot be compiled without snappy, lz4, zlib, zstd
 option(WITH_SNAPPY "build with SNAPPY" ON)
 option(WITH_LZ4 "build with lz4" ON)
 option(WITH_ZLIB "build with zlib" ON)
 option(WITH_ZSTD "build with zstd" ON)

-if(WITH_SNAPPY)
+if (ENABLE_JEMALLOC AND OS_LINUX) # gives compile errors with jemalloc enabled for rocksdb on non-Linux
+  add_definitions(-DROCKSDB_JEMALLOC -DJEMALLOC_NO_DEMANGLE)
+  list (APPEND THIRDPARTY_LIBS ch_contrib::jemalloc)
+endif ()
+
+if (ENABLE_LIBURING)
+  add_definitions(-DROCKSDB_IOURING_PRESENT)
+  list (APPEND THIRDPARTY_LIBS ch_contrib::liburing)
+endif ()
+
+if (WITH_SNAPPY)
  add_definitions(-DSNAPPY)
  list(APPEND THIRDPARTY_LIBS ch_contrib::snappy)
 endif()

-if(WITH_ZLIB)
+if (WITH_ZLIB)
  add_definitions(-DZLIB)
  list(APPEND THIRDPARTY_LIBS ch_contrib::zlib)
 endif()

-if(WITH_LZ4)
+if (WITH_LZ4)
  add_definitions(-DLZ4)
  list(APPEND THIRDPARTY_LIBS ch_contrib::lz4)
 endif()

-if(WITH_ZSTD)
+if (WITH_ZSTD)
  add_definitions(-DZSTD)
  list(APPEND THIRDPARTY_LIBS ch_contrib::zstd)
 endif()
@ -88,6 +90,7 @@ set(SOURCES
    ${ROCKSDB_SOURCE_DIR}/cache/sharded_cache.cc
    ${ROCKSDB_SOURCE_DIR}/cache/tiered_secondary_cache.cc
    ${ROCKSDB_SOURCE_DIR}/db/arena_wrapped_db_iter.cc
+    ${ROCKSDB_SOURCE_DIR}/db/attribute_group_iterator_impl.cc
    ${ROCKSDB_SOURCE_DIR}/db/blob/blob_contents.cc
    ${ROCKSDB_SOURCE_DIR}/db/blob/blob_fetcher.cc
    ${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_addition.cc
@ -104,6 +107,7 @@ set(SOURCES
    ${ROCKSDB_SOURCE_DIR}/db/blob/prefetch_buffer_collection.cc
    ${ROCKSDB_SOURCE_DIR}/db/builder.cc
    ${ROCKSDB_SOURCE_DIR}/db/c.cc
+    ${ROCKSDB_SOURCE_DIR}/db/coalescing_iterator.cc
    ${ROCKSDB_SOURCE_DIR}/db/column_family.cc
    ${ROCKSDB_SOURCE_DIR}/db/compaction/compaction.cc
    ${ROCKSDB_SOURCE_DIR}/db/compaction/compaction_iterator.cc
@ -124,6 +128,7 @@ set(SOURCES
    ${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_write.cc
    ${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_compaction_flush.cc
    ${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_files.cc
+    ${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_follower.cc
    ${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_open.cc
    ${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_debug.cc
    ${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_experimental.cc
@ -181,6 +186,7 @@ set(SOURCES
    ${ROCKSDB_SOURCE_DIR}/env/env_encryption.cc
    ${ROCKSDB_SOURCE_DIR}/env/file_system.cc
    ${ROCKSDB_SOURCE_DIR}/env/file_system_tracer.cc
+    ${ROCKSDB_SOURCE_DIR}/env/fs_on_demand.cc
    ${ROCKSDB_SOURCE_DIR}/env/fs_remap.cc
    ${ROCKSDB_SOURCE_DIR}/env/mock_env.cc
    ${ROCKSDB_SOURCE_DIR}/env/unique_id_gen.cc
@ -368,6 +374,7 @@ set(SOURCES
    ${ROCKSDB_SOURCE_DIR}/utilities/persistent_cache/volatile_tier_impl.cc
    ${ROCKSDB_SOURCE_DIR}/utilities/simulator_cache/cache_simulator.cc
    ${ROCKSDB_SOURCE_DIR}/utilities/simulator_cache/sim_cache.cc
+    ${ROCKSDB_SOURCE_DIR}/utilities/table_properties_collectors/compact_for_tiering_collector.cc
    ${ROCKSDB_SOURCE_DIR}/utilities/table_properties_collectors/compact_on_deletion_collector.cc
    ${ROCKSDB_SOURCE_DIR}/utilities/trace/file_trace_reader_writer.cc
    ${ROCKSDB_SOURCE_DIR}/utilities/trace/replayer_impl.cc
@ -388,6 +395,7 @@ set(SOURCES
    ${ROCKSDB_SOURCE_DIR}/utilities/transactions/write_prepared_txn_db.cc
    ${ROCKSDB_SOURCE_DIR}/utilities/transactions/write_unprepared_txn.cc
    ${ROCKSDB_SOURCE_DIR}/utilities/transactions/write_unprepared_txn_db.cc
+    ${ROCKSDB_SOURCE_DIR}/utilities/types_util.cc
    ${ROCKSDB_SOURCE_DIR}/utilities/ttl/db_ttl_impl.cc
    ${ROCKSDB_SOURCE_DIR}/utilities/wal_filter.cc
    ${ROCKSDB_SOURCE_DIR}/utilities/write_batch_with_index/write_batch_with_index.cc
@ -418,14 +426,18 @@ if(HAS_ARMV8_CRC)
 endif(HAS_ARMV8_CRC)

 list(APPEND SOURCES
-    "${ROCKSDB_SOURCE_DIR}/port/port_posix.cc"
-    "${ROCKSDB_SOURCE_DIR}/env/env_posix.cc"
-    "${ROCKSDB_SOURCE_DIR}/env/fs_posix.cc"
-    "${ROCKSDB_SOURCE_DIR}/env/io_posix.cc")
+    ${ROCKSDB_SOURCE_DIR}/port/port_posix.cc
+    ${ROCKSDB_SOURCE_DIR}/env/env_posix.cc
+    ${ROCKSDB_SOURCE_DIR}/env/fs_posix.cc
+    ${ROCKSDB_SOURCE_DIR}/env/io_posix.cc)

 add_library(_rocksdb ${SOURCES})
 add_library(ch_contrib::rocksdb ALIAS _rocksdb)
 target_link_libraries(_rocksdb PRIVATE ${THIRDPARTY_LIBS} ${SYSTEM_LIBS})

+# Not in the native build system but useful anyways:
+# Make all functions in xxHash.h inline. Beneficial for performance: https://github.com/Cyan4973/xxHash/tree/v0.8.2#build-modifiers
+target_compile_definitions (_rocksdb PRIVATE XXH_INLINE_ALL)
+
 # SYSTEM is required to overcome some issues
 target_include_directories(_rocksdb SYSTEM BEFORE INTERFACE "${ROCKSDB_SOURCE_DIR}/include")
--- a/contrib/usearch
+++ b/contrib/usearch
@ -1 +1 @@
-Subproject commit 955c6f9c11adfd89c912e0d1643d160b4e9e543f
+Subproject commit 30810452bec5d3d3aa0931bb5d761e2f09aa6356
--- a/contrib/zlib-ng
+++ b/contrib/zlib-ng
@ -1 +1 @@
-Subproject commit 50f0eae1a411764cd6d1e85b3ce471438acd3c1c
+Subproject commit a2fbeffdc30a8b0ce6d54ee31208e2688eac4c9f
--- a/contrib/zlib-ng-cmake/CMakeLists.txt
+++ b/contrib/zlib-ng-cmake/CMakeLists.txt
@ -14,6 +14,8 @@ add_definitions(-DHAVE_VISIBILITY_HIDDEN)
 add_definitions(-DHAVE_VISIBILITY_INTERNAL)
 add_definitions(-DHAVE_BUILTIN_CTZ)
 add_definitions(-DHAVE_BUILTIN_CTZLL)
+add_definitions(-DHAVE_ATTRIBUTE_ALIGNED)
+add_definitions(-DHAVE_POSIX_MEMALIGN)

 set(ZLIB_ARCH_SRCS)
 set(ZLIB_ARCH_HDRS)
@ -24,67 +26,74 @@ if(ARCH_AARCH64)
    set(ARCHDIR "${SOURCE_DIR}/arch/arm")

    add_definitions(-DARM_FEATURES)
+    add_definitions(-DHAVE_SYS_AUXV_H)
    add_definitions(-DARM_AUXV_HAS_CRC32 -DARM_ASM_HWCAP)
    add_definitions(-DARM_AUXV_HAS_NEON)
-    add_definitions(-DARM_ACLE_CRC_HASH)
-    add_definitions(-DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH)
+    add_definitions(-DARM_ACLE)
+    add_definitions(-DHAVE_ARM_ACLE_H)
+    add_definitions(-DARM_NEON)
+    add_definitions(-DARM_NEON_HASLD4)

-    list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/arm.h)
-    list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/armfeature.c)
+    list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/arm_features.h)
+    list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/arm_features.c)
    set(ACLE_SRCS ${ARCHDIR}/crc32_acle.c ${ARCHDIR}/insert_string_acle.c)
    list(APPEND ZLIB_ARCH_SRCS ${ACLE_SRCS})
-    set(NEON_SRCS ${ARCHDIR}/adler32_neon.c ${ARCHDIR}/chunkset_neon.c ${ARCHDIR}/slide_neon.c)
+    set(NEON_SRCS ${ARCHDIR}/adler32_neon.c ${ARCHDIR}/chunkset_neon.c
+                    ${ARCHDIR}/compare256_neon.c ${ARCHDIR}/slide_hash_neon.c)
    list(APPEND ZLIB_ARCH_SRCS ${NEON_SRCS})

 elseif(ARCH_PPC64LE)
    set(ARCHDIR "${SOURCE_DIR}/arch/power")

-    add_definitions(-DPOWER8)
    add_definitions(-DPOWER_FEATURES)
-    add_definitions(-DPOWER8_VSX_ADLER32)
-    add_definitions(-DPOWER8_VSX_SLIDEHASH)
+    add_definitions(-DHAVE_SYS_AUXV_H)

-    list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/power.h)
-    list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/power.c)
-    set(POWER8_SRCS ${ARCHDIR}/adler32_power8.c ${ARCHDIR}/slide_hash_power8.c)
+    if(POWER9)
+        add_definitions(-DPOWER9)
+    else()
+        add_definitions(-DPOWER8)
+        add_definitions(-DPOWER8_VSX)
+        add_definitions(-DPOWER8_VSX_CRC32)
+    endif()
+
+    list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/power_features.h)
+    list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/power_features.c)
+    set(POWER8_SRCS ${ARCHDIR}/adler32_power8.c ${ARCHDIR}/chunkset_power8.c ${ARCHDIR}/slide_hash_power8.c)
+    list(APPEND POWER8_SRCS ${ARCHDIR}/crc32_power8.c)
    list(APPEND ZLIB_ARCH_SRCS ${POWER8_SRCS})

 elseif(ARCH_AMD64)
    set(ARCHDIR "${SOURCE_DIR}/arch/x86")

    add_definitions(-DX86_FEATURES)
-    list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/x86.h)
-    list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/x86.c)
+    list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/x86_features.h)
+    list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/x86_features.c)
    if(ENABLE_AVX2)
-        add_definitions(-DX86_AVX2 -DX86_AVX2_ADLER32 -DX86_AVX_CHUNKSET)
-        set(AVX2_SRCS ${ARCHDIR}/slide_avx.c)
-        list(APPEND AVX2_SRCS ${ARCHDIR}/chunkset_avx.c)
-        list(APPEND AVX2_SRCS ${ARCHDIR}/compare258_avx.c)
-        list(APPEND AVX2_SRCS ${ARCHDIR}/adler32_avx.c)
+        add_definitions(-DX86_AVX2)
+        set(AVX2_SRCS ${ARCHDIR}/slide_hash_avx2.c)
+        list(APPEND AVX2_SRCS ${ARCHDIR}/chunkset_avx2.c)
+        list(APPEND AVX2_SRCS ${ARCHDIR}/compare256_avx2.c)
+        list(APPEND AVX2_SRCS ${ARCHDIR}/adler32_avx2.c)
        list(APPEND ZLIB_ARCH_SRCS ${AVX2_SRCS})
    endif()
    if(ENABLE_SSE42)
-        add_definitions(-DX86_SSE42_CRC_HASH)
-        set(SSE42_SRCS ${ARCHDIR}/insert_string_sse.c)
-        list(APPEND ZLIB_ARCH_SRCS ${SSE42_SRCS})
-        add_definitions(-DX86_SSE42_CRC_INTRIN)
-        add_definitions(-DX86_SSE42_CMP_STR)
-        set(SSE42_SRCS ${ARCHDIR}/compare258_sse.c)
+        add_definitions(-DX86_SSE42)
+        set(SSE42_SRCS ${ARCHDIR}/adler32_sse42.c ${ARCHDIR}/insert_string_sse42.c)
        list(APPEND ZLIB_ARCH_SRCS ${SSE42_SRCS})
    endif()
    if(ENABLE_SSSE3)
-        add_definitions(-DX86_SSSE3 -DX86_SSSE3_ADLER32)
-        set(SSSE3_SRCS ${ARCHDIR}/adler32_ssse3.c)
+        add_definitions(-DX86_SSSE3)
+        set(SSSE3_SRCS ${ARCHDIR}/adler32_ssse3.c ${ARCHDIR}/chunkset_ssse3.c)
        list(APPEND ZLIB_ARCH_SRCS ${SSSE3_SRCS})
    endif()
    if(ENABLE_PCLMULQDQ)
        add_definitions(-DX86_PCLMULQDQ_CRC)
-        set(PCLMULQDQ_SRCS ${ARCHDIR}/crc_folding.c)
+        set(PCLMULQDQ_SRCS ${ARCHDIR}/crc32_pclmulqdq.c)
        list(APPEND ZLIB_ARCH_SRCS ${PCLMULQDQ_SRCS})
    endif()

-    add_definitions(-DX86_SSE2 -DX86_SSE2_CHUNKSET -DX86_SSE2_SLIDEHASH)
-    set(SSE2_SRCS ${ARCHDIR}/chunkset_sse.c ${ARCHDIR}/slide_sse.c)
+    add_definitions(-DX86_SSE2)
+    set(SSE2_SRCS ${ARCHDIR}/chunkset_sse2.c ${ARCHDIR}/compare256_sse2.c ${ARCHDIR}/slide_hash_sse2.c)
    list(APPEND ZLIB_ARCH_SRCS ${SSE2_SRCS})
    add_definitions(-DX86_NOCHECK_SSE2)
 endif ()
@ -106,39 +115,45 @@ generate_cmakein(${SOURCE_DIR}/zconf.h.in ${CMAKE_CURRENT_BINARY_DIR}/zconf.h.cm

 set(ZLIB_SRCS
    ${SOURCE_DIR}/adler32.c
+    ${SOURCE_DIR}/adler32_fold.c
    ${SOURCE_DIR}/chunkset.c
-    ${SOURCE_DIR}/compare258.c
+    ${SOURCE_DIR}/compare256.c
    ${SOURCE_DIR}/compress.c
-    ${SOURCE_DIR}/crc32.c
-    ${SOURCE_DIR}/crc32_comb.c
+    ${SOURCE_DIR}/cpu_features.c
+    ${SOURCE_DIR}/crc32_braid.c
+    ${SOURCE_DIR}/crc32_braid_comb.c
+    ${SOURCE_DIR}/crc32_fold.c
    ${SOURCE_DIR}/deflate.c
    ${SOURCE_DIR}/deflate_fast.c
+    ${SOURCE_DIR}/deflate_huff.c
    ${SOURCE_DIR}/deflate_medium.c
    ${SOURCE_DIR}/deflate_quick.c
+    ${SOURCE_DIR}/deflate_rle.c
    ${SOURCE_DIR}/deflate_slow.c
+    ${SOURCE_DIR}/deflate_stored.c
    ${SOURCE_DIR}/functable.c
    ${SOURCE_DIR}/infback.c
-    ${SOURCE_DIR}/inffast.c
    ${SOURCE_DIR}/inflate.c
    ${SOURCE_DIR}/inftrees.c
    ${SOURCE_DIR}/insert_string.c
+    ${SOURCE_DIR}/insert_string_roll.c
+    ${SOURCE_DIR}/slide_hash.c
    ${SOURCE_DIR}/trees.c
    ${SOURCE_DIR}/uncompr.c
    ${SOURCE_DIR}/zutil.c
+)
+
+set(ZLIB_GZFILE_SRCS
    ${SOURCE_DIR}/gzlib.c
-    ${SOURCE_DIR}/gzread.c
+    ${CMAKE_CURRENT_BINARY_DIR}/gzread.c
    ${SOURCE_DIR}/gzwrite.c
 )

-set(ZLIB_ALL_SRCS ${ZLIB_SRCS} ${ZLIB_ARCH_SRCS})
+set(ZLIB_ALL_SRCS ${ZLIB_SRCS} ${ZLIB_ARCH_SRCS} ${ZLIB_GZFILE_SRCS})

 add_library(_zlib ${ZLIB_ALL_SRCS})
 add_library(ch_contrib::zlib ALIAS _zlib)

-# https://github.com/zlib-ng/zlib-ng/pull/733
-# This is disabed by default
-add_compile_definitions(Z_TLS=__thread)
-
 if(HAVE_UNISTD_H)
  SET(ZCONF_UNISTD_LINE "#if 1    /* was set to #if 1 by configure/cmake/etc */")
 else()
@ -153,6 +168,9 @@ endif()
 set(ZLIB_PC ${CMAKE_CURRENT_BINARY_DIR}/zlib.pc)
 configure_file(${SOURCE_DIR}/zlib.pc.cmakein ${ZLIB_PC} @ONLY)
 configure_file(${CMAKE_CURRENT_BINARY_DIR}/zconf.h.cmakein ${CMAKE_CURRENT_BINARY_DIR}/zconf.h @ONLY)
+configure_file(${SOURCE_DIR}/zlib.h.in ${CMAKE_CURRENT_BINARY_DIR}/zlib.h @ONLY)
+configure_file(${SOURCE_DIR}/zlib_name_mangling.h.in ${CMAKE_CURRENT_BINARY_DIR}/zlib_name_mangling.h @ONLY)
+configure_file(${SOURCE_DIR}/gzread.c.in ${CMAKE_CURRENT_BINARY_DIR}/gzread.c @ONLY)

 # We should use same defines when including zlib.h as used when zlib compiled
 target_compile_definitions (_zlib PUBLIC ZLIB_COMPAT WITH_GZFILEOP)
--- a/docker/keeper/Dockerfile
+++ b/docker/keeper/Dockerfile
@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="24.7.2.13"
+ARG VERSION="24.7.3.42"
 ARG PACKAGES="clickhouse-keeper"
 ARG DIRECT_DOWNLOAD_URLS=""

--- a/docker/keeper/entrypoint.sh
+++ b/docker/keeper/entrypoint.sh
@ -40,8 +40,6 @@ fi

 DATA_DIR="${CLICKHOUSE_DATA_DIR:-/var/lib/clickhouse}"
 LOG_DIR="${LOG_DIR:-/var/log/clickhouse-keeper}"
-LOG_PATH="${LOG_DIR}/clickhouse-keeper.log"
-ERROR_LOG_PATH="${LOG_DIR}/clickhouse-keeper.err.log"
 COORDINATION_DIR="${DATA_DIR}/coordination"
 COORDINATION_LOG_DIR="${DATA_DIR}/coordination/log"
 COORDINATION_SNAPSHOT_DIR="${DATA_DIR}/coordination/snapshots"
@ -84,7 +82,7 @@ if [[ $# -lt 1 ]] || [[ "$1" == "--"* ]]; then

    # There is a config file. It is already tested with gosu (if it is readably by keeper user)
    if [ -f "$KEEPER_CONFIG" ]; then
-        exec $gosu /usr/bin/clickhouse-keeper --config-file="$KEEPER_CONFIG" --log-file="$LOG_PATH" --errorlog-file="$ERROR_LOG_PATH" "$@"
+        exec $gosu /usr/bin/clickhouse-keeper --config-file="$KEEPER_CONFIG" "$@"
    fi

    # There is no config file. Will use embedded one
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="24.7.2.13"
+ARG VERSION="24.7.3.42"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 ARG DIRECT_DOWNLOAD_URLS=""

--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@ -28,7 +28,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list

 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
-ARG VERSION="24.7.2.13"
+ARG VERSION="24.7.3.42"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"

 #docker-official-library:off
--- a/docker/test/base/Dockerfile
+++ b/docker/test/base/Dockerfile
@ -28,12 +28,14 @@ RUN echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 abort_on_error=1 history_
 RUN echo "UBSAN_OPTIONS='print_stacktrace=1 max_allocation_size_mb=32768'" >> /etc/environment
 RUN echo "MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1 max_allocation_size_mb=32768'" >> /etc/environment
 RUN echo "LSAN_OPTIONS='suppressions=/usr/share/clickhouse-test/config/lsan_suppressions.txt max_allocation_size_mb=32768'" >> /etc/environment
+RUN echo "ASAN_OPTIONS='halt_on_error=1 abort_on_error=1'" >> /etc/environment
 # Sanitizer options for current shell (not current, but the one that will be spawned on "docker run")
 # (but w/o verbosity for TSAN, otherwise test.reference will not match)
 ENV TSAN_OPTIONS='halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1 max_allocation_size_mb=32768'
 ENV UBSAN_OPTIONS='print_stacktrace=1 max_allocation_size_mb=32768'
 ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1 max_allocation_size_mb=32768'
 ENV LSAN_OPTIONS='max_allocation_size_mb=32768'
+ENV ASAN_OPTIONS='halt_on_error=1 abort_on_error=1'

 # for external_symbolizer_path, and also ensure that llvm-symbolizer really
 # exists (since you don't want to fallback to addr2line, it is very slow)
--- a/docker/test/fuzzer/run-fuzzer.sh
+++ b/docker/test/fuzzer/run-fuzzer.sh
@ -193,53 +193,60 @@ function fuzz

    kill -0 $server_pid

-    # Set follow-fork-mode to parent, because we attach to clickhouse-server, not to watchdog
-    # and clickhouse-server can do fork-exec, for example, to run some bridge.
-    # Do not set nostop noprint for all signals, because some it may cause gdb to hang,
-    # explicitly ignore non-fatal signals that are used by server.
-    # Number of SIGRTMIN can be determined only in runtime.
-    RTMIN=$(kill -l SIGRTMIN)
-    echo "
-set follow-fork-mode parent
-handle SIGHUP nostop noprint pass
-handle SIGINT nostop noprint pass
-handle SIGQUIT nostop noprint pass
-handle SIGPIPE nostop noprint pass
-handle SIGTERM nostop noprint pass
-handle SIGUSR1 nostop noprint pass
-handle SIGUSR2 nostop noprint pass
-handle SIG$RTMIN nostop noprint pass
-info signals
-continue
-backtrace full
-thread apply all backtrace full
-info registers
-disassemble /s
-up
-disassemble /s
-up
-disassemble /s
-p \"done\"
-detach
-quit
-" > script.gdb
+    IS_ASAN=$(clickhouse-client --query "SELECT count() FROM system.build_options WHERE name = 'CXX_FLAGS' AND position('sanitize=address' IN value)")
+    if [[ "$IS_ASAN" = "1" ]];
+    then
+        echo "ASAN build detected. Not using gdb since it disables LeakSanitizer detections"
+    else
+        # Set follow-fork-mode to parent, because we attach to clickhouse-server, not to watchdog
+        # and clickhouse-server can do fork-exec, for example, to run some bridge.
+        # Do not set nostop noprint for all signals, because some it may cause gdb to hang,
+        # explicitly ignore non-fatal signals that are used by server.
+        # Number of SIGRTMIN can be determined only in runtime.
+        RTMIN=$(kill -l SIGRTMIN)
+        echo "
+    set follow-fork-mode parent
+    handle SIGHUP nostop noprint pass
+    handle SIGINT nostop noprint pass
+    handle SIGQUIT nostop noprint pass
+    handle SIGPIPE nostop noprint pass
+    handle SIGTERM nostop noprint pass
+    handle SIGUSR1 nostop noprint pass
+    handle SIGUSR2 nostop noprint pass
+    handle SIG$RTMIN nostop noprint pass
+    info signals
+    continue
+    backtrace full
+    thread apply all backtrace full
+    info registers
+    disassemble /s
+    up
+    disassemble /s
+    up
+    disassemble /s
+    p \"done\"
+    detach
+    quit
+    " > script.gdb

-    gdb -batch -command script.gdb -p $server_pid &
-    sleep 5
-    # gdb will send SIGSTOP, spend some time loading debug info, and then send SIGCONT, wait for it (up to send_timeout, 300s)
-    time clickhouse-client --query "SELECT 'Connected to clickhouse-server after attaching gdb'" ||:
+        gdb -batch -command script.gdb -p $server_pid &
+        sleep 5
+        # gdb will send SIGSTOP, spend some time loading debug info, and then send SIGCONT, wait for it (up to send_timeout, 300s)
+        time clickhouse-client --query "SELECT 'Connected to clickhouse-server after attaching gdb'" ||:
+
+        # Check connectivity after we attach gdb, because it might cause the server
+        # to freeze, and the fuzzer will fail. In debug build, it can take a lot of time.
+        for _ in {1..180}
+        do
+            if clickhouse-client --query "select 1"
+            then
+                break
+            fi
+            sleep 1
+        done
+        kill -0 $server_pid # This checks that it is our server that is started and not some other one
+    fi

-    # Check connectivity after we attach gdb, because it might cause the server
-    # to freeze, and the fuzzer will fail. In debug build, it can take a lot of time.
-    for _ in {1..180}
-    do
-        if clickhouse-client --query "select 1"
-        then
-            break
-        fi
-        sleep 1
-    done
-    kill -0 $server_pid # This checks that it is our server that is started and not some other one
    echo 'Server started and responded.'

    setup_logs_replication
@ -264,8 +271,13 @@ quit
    # The fuzzer_pid belongs to the timeout process.
    actual_fuzzer_pid=$(ps -o pid= --ppid "$fuzzer_pid")

-    echo "Attaching gdb to the fuzzer itself"
-    gdb -batch -command script.gdb -p $actual_fuzzer_pid &
+    if [[ "$IS_ASAN" = "1" ]];
+    then
+        echo "ASAN build detected. Not using gdb since it disables LeakSanitizer detections"
+    else
+        echo "Attaching gdb to the fuzzer itself"
+        gdb -batch -command script.gdb -p $actual_fuzzer_pid &
+    fi

    # Wait for the fuzzer to complete.
    # Note that the 'wait || ...' thing is required so that the script doesn't
--- a/docker/test/integration/runner/requirements.txt
+++ b/docker/test/integration/runner/requirements.txt
@ -74,6 +74,7 @@ protobuf==4.25.2
 psycopg2-binary==2.9.6
 py4j==0.10.9.5
 py==1.11.0
+pyarrow==17.0.0
 pycparser==2.22
 pycryptodome==3.20.0
 pymongo==3.11.0
--- a/docker/test/stateful/run.sh
+++ b/docker/test/stateful/run.sh
@ -232,15 +232,26 @@ function run_tests()

    set +e

+    TEST_ARGS=(
+        -j 2
+        --testname
+        --shard
+        --zookeeper
+        --check-zookeeper-session
+        --no-stateless
+        --hung-check
+        --print-time
+        --capture-client-stacktrace
+        "${ADDITIONAL_OPTIONS[@]}"
+        "$SKIP_TESTS_OPTION"
+    )
    if [[ -n "$USE_PARALLEL_REPLICAS" ]] && [[ "$USE_PARALLEL_REPLICAS" -eq 1 ]]; then
-        clickhouse-test --client="clickhouse-client --allow_experimental_parallel_reading_from_replicas=1 --parallel_replicas_for_non_replicated_merge_tree=1 \
-            --max_parallel_replicas=100 --cluster_for_parallel_replicas='parallel_replicas'" \
-            -j 2 --testname --shard --zookeeper --check-zookeeper-session --no-stateless --no-parallel-replicas --hung-check --print-time "${ADDITIONAL_OPTIONS[@]}" \
-        "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
-    else
-        clickhouse-test -j 2 --testname --shard --zookeeper --check-zookeeper-session --no-stateless --hung-check --print-time "${ADDITIONAL_OPTIONS[@]}" \
-        "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
+        TEST_ARGS+=(
+            --client="clickhouse-client --allow_experimental_parallel_reading_from_replicas=1 --parallel_replicas_for_non_replicated_merge_tree=1 --max_parallel_replicas=100 --cluster_for_parallel_replicas='parallel_replicas'"
+            --no-parallel-replicas
+        )
    fi
+    clickhouse-test "${TEST_ARGS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
    set -e
 }

--- a/docker/test/stateless/attach_gdb.lib
+++ b/docker/test/stateless/attach_gdb.lib
@ -5,47 +5,53 @@ source /utils.lib

 function attach_gdb_to_clickhouse()
 {
-    # Set follow-fork-mode to parent, because we attach to clickhouse-server, not to watchdog
-    # and clickhouse-server can do fork-exec, for example, to run some bridge.
-    # Do not set nostop noprint for all signals, because some it may cause gdb to hang,
-    # explicitly ignore non-fatal signals that are used by server.
-    # Number of SIGRTMIN can be determined only in runtime.
-    RTMIN=$(kill -l SIGRTMIN)
-    # shellcheck disable=SC2016
-    echo "
-set follow-fork-mode parent
-handle SIGHUP nostop noprint pass
-handle SIGINT nostop noprint pass
-handle SIGQUIT nostop noprint pass
-handle SIGPIPE nostop noprint pass
-handle SIGTERM nostop noprint pass
-handle SIGUSR1 nostop noprint pass
-handle SIGUSR2 nostop noprint pass
-handle SIG$RTMIN nostop noprint pass
-info signals
-continue
-backtrace full
-info registers
-p "top 1 KiB of the stack:"
-p/x *(uint64_t[128]*)"'$sp'"
-maintenance info sections
-thread apply all backtrace full
-disassemble /s
-up
-disassemble /s
-up
-disassemble /s
-p \"done\"
-detach
-quit
-" > script.gdb
+    IS_ASAN=$(clickhouse-client --query "SELECT count() FROM system.build_options WHERE name = 'CXX_FLAGS' AND position('sanitize=address' IN value)")
+    if [[ "$IS_ASAN" = "1" ]];
+    then
+        echo "ASAN build detected. Not using gdb since it disables LeakSanitizer detections"
+    else
+            # Set follow-fork-mode to parent, because we attach to clickhouse-server, not to watchdog
+            # and clickhouse-server can do fork-exec, for example, to run some bridge.
+            # Do not set nostop noprint for all signals, because some it may cause gdb to hang,
+            # explicitly ignore non-fatal signals that are used by server.
+            # Number of SIGRTMIN can be determined only in runtime.
+            RTMIN=$(kill -l SIGRTMIN)
+            # shellcheck disable=SC2016
+            echo "
+        set follow-fork-mode parent
+        handle SIGHUP nostop noprint pass
+        handle SIGINT nostop noprint pass
+        handle SIGQUIT nostop noprint pass
+        handle SIGPIPE nostop noprint pass
+        handle SIGTERM nostop noprint pass
+        handle SIGUSR1 nostop noprint pass
+        handle SIGUSR2 nostop noprint pass
+        handle SIG$RTMIN nostop noprint pass
+        info signals
+        continue
+        backtrace full
+        info registers
+        p "top 1 KiB of the stack:"
+        p/x *(uint64_t[128]*)"'$sp'"
+        maintenance info sections
+        thread apply all backtrace full
+        disassemble /s
+        up
+        disassemble /s
+        up
+        disassemble /s
+        p \"done\"
+        detach
+        quit
+        " > script.gdb

-    # FIXME Hung check may work incorrectly because of attached gdb
-    # We cannot attach another gdb to get stacktraces if some queries hung
-    gdb -batch -command script.gdb -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log &
-    sleep 5
-    # gdb will send SIGSTOP, spend some time loading debug info and then send SIGCONT, wait for it (up to send_timeout, 300s)
-    run_with_retry 60 clickhouse-client --query "SELECT 'Connected to clickhouse-server after attaching gdb'"
+            # FIXME Hung check may work incorrectly because of attached gdb
+            # We cannot attach another gdb to get stacktraces if some queries hung
+            gdb -batch -command script.gdb -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log &
+            sleep 5
+            # gdb will send SIGSTOP, spend some time loading debug info and then send SIGCONT, wait for it (up to send_timeout, 300s)
+            run_with_retry 60 clickhouse-client --query "SELECT 'Connected to clickhouse-server after attaching gdb'"
+    fi
 }

 # vi: ft=bash
--- a/docker/test/stateless/run.sh
+++ b/docker/test/stateless/run.sh
@ -174,7 +174,7 @@ do
 done

 setup_logs_replication
-attach_gdb_to_clickhouse || true  # FIXME: to not break old builds, clean on 2023-09-01
+attach_gdb_to_clickhouse

 function fn_exists() {
    declare -F "$1" > /dev/null;
@ -264,11 +264,22 @@ function run_tests()
    TIMEOUT=$((MAX_RUN_TIME - 800 > 8400 ? 8400 : MAX_RUN_TIME - 800))
    START_TIME=${SECONDS}
    set +e
-    timeout --preserve-status --signal TERM --kill-after 60m ${TIMEOUT}s \
-        clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \
-            --no-drop-if-fail --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \
-    | ts '%Y-%m-%d %H:%M:%S' \
-    | tee -a test_output/test_result.txt
+
+    TEST_ARGS=(
+        --testname
+        --shard
+        --zookeeper
+        --check-zookeeper-session
+        --hung-check
+        --print-time
+        --no-drop-if-fail
+        --capture-client-stacktrace
+        --test-runs "$NUM_TRIES"
+        "${ADDITIONAL_OPTIONS[@]}"
+    )
+    timeout --preserve-status --signal TERM --kill-after 60m ${TIMEOUT}s clickhouse-test "${TEST_ARGS[@]}" 2>&1 \
+        | ts '%Y-%m-%d %H:%M:%S' \
+        | tee -a test_output/test_result.txt
    set -e
    DURATION=$((SECONDS - START_TIME))

--- a/docker/test/stateless/stress_tests.lib
+++ b/docker/test/stateless/stress_tests.lib
@ -308,7 +308,8 @@ function collect_query_and_trace_logs()
 {
    for table in query_log trace_log metric_log
    do
-        clickhouse-local --config-file=/etc/clickhouse-server/config.xml --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||:
+        # Don't ignore errors here, it leads to ignore sanitizer reports when running clickhouse-local
+        clickhouse-local --config-file=/etc/clickhouse-server/config.xml --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst
    done
 }

--- a/docker/test/unit/Dockerfile
+++ b/docker/test/unit/Dockerfile
@ -4,4 +4,5 @@ ARG FROM_TAG=latest
 FROM clickhouse/test-base:$FROM_TAG

 COPY run.sh /
-CMD ["/bin/bash", "/run.sh"]
+RUN chmod +x run.sh
+ENTRYPOINT ["/run.sh"]
--- a/docker/test/unit/run.sh
+++ b/docker/test/unit/run.sh
@ -1,5 +1,27 @@
 #!/bin/bash

 set -x
+# Need to keep error from tests after `tee`. Otherwise we don't alert on asan errors
+set -o pipefail
+set -e

-timeout 40m gdb -q  -ex 'set print inferior-events off' -ex 'set confirm off' -ex 'set print thread-events off' -ex run -ex bt -ex quit --args ./unit_tests_dbms --gtest_output='json:test_output/test_result.json' | tee test_output/test_result.txt
+if [ "$#" -ne 1 ]; then
+    echo "Expected exactly one argument"
+    exit 1
+fi
+
+if [ "$1" = "GDB" ];
+then
+  timeout 40m \
+    gdb -q  -ex "set print inferior-events off" -ex "set confirm off" -ex "set print thread-events off" -ex run -ex bt -ex quit --args \
+    ./unit_tests_dbms --gtest_output='json:test_output/test_result.json' \
+    | tee test_output/test_result.txt
+elif [ "$1" = "NO_GDB" ];
+then
+  timeout 40m \
+    ./unit_tests_dbms --gtest_output='json:test_output/test_result.json' \
+    | tee test_output/test_result.txt
+else
+    echo "Unknown argument: $1"
+    exit 1
+fi
--- a/docs/changelogs/v24.7.3.42-stable.md
+++ b/docs/changelogs/v24.7.3.42-stable.md
@ -0,0 +1,37 @@
+---
+sidebar_position: 1
+sidebar_label: 2024
+---
+
+# 2024 Changelog
+
+### ClickHouse release v24.7.3.42-stable (63730bc4293) FIXME as compared to v24.7.2.13-stable (6e41f601b2f)
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+* Backported in [#67969](https://github.com/ClickHouse/ClickHouse/issues/67969): Fixed reading of subcolumns after `ALTER ADD COLUMN` query. [#66243](https://github.com/ClickHouse/ClickHouse/pull/66243) ([Anton Popov](https://github.com/CurtizJ)).
+* Backported in [#67637](https://github.com/ClickHouse/ClickHouse/issues/67637): Fix for occasional deadlock in Context::getDDLWorker. [#66843](https://github.com/ClickHouse/ClickHouse/pull/66843) ([Alexander Gololobov](https://github.com/davenger)).
+* Backported in [#67820](https://github.com/ClickHouse/ClickHouse/issues/67820): Fix possible deadlock on query cancel with parallel replicas. [#66905](https://github.com/ClickHouse/ClickHouse/pull/66905) ([Nikita Taranov](https://github.com/nickitat)).
+* Backported in [#67881](https://github.com/ClickHouse/ClickHouse/issues/67881): Correctly parse file name/URI containing `::` if it's not an archive. [#67433](https://github.com/ClickHouse/ClickHouse/pull/67433) ([Antonio Andelic](https://github.com/antonio2368)).
+* Backported in [#67713](https://github.com/ClickHouse/ClickHouse/issues/67713): Fix reloading SQL UDFs with UNION. Previously, restarting the server could make UDF invalid. [#67665](https://github.com/ClickHouse/ClickHouse/pull/67665) ([Antonio Andelic](https://github.com/antonio2368)).
+* Backported in [#67995](https://github.com/ClickHouse/ClickHouse/issues/67995): Validate experimental/suspicious data types in ALTER ADD/MODIFY COLUMN. [#67911](https://github.com/ClickHouse/ClickHouse/pull/67911) ([Kruglov Pavel](https://github.com/Avogar)).
+
+#### Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC)
+
+* Backported in [#67818](https://github.com/ClickHouse/ClickHouse/issues/67818): Only relevant to the experimental Variant data type. Fix crash with Variant + AggregateFunction type. [#67122](https://github.com/ClickHouse/ClickHouse/pull/67122) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#67766](https://github.com/ClickHouse/ClickHouse/issues/67766): Fix crash of `uniq` and `uniqTheta ` with `tuple()` argument. Closes [#67303](https://github.com/ClickHouse/ClickHouse/issues/67303). [#67306](https://github.com/ClickHouse/ClickHouse/pull/67306) ([flynn](https://github.com/ucasfl)).
+* Backported in [#67854](https://github.com/ClickHouse/ClickHouse/issues/67854): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)).
+* Backported in [#67840](https://github.com/ClickHouse/ClickHouse/issues/67840): Fix potential stack overflow in `JSONMergePatch` function. Renamed this function from `jsonMergePatch` to `JSONMergePatch` because the previous name was wrong. The previous name is still kept for compatibility. Improved diagnostic of errors in the function. This closes [#67304](https://github.com/ClickHouse/ClickHouse/issues/67304). [#67756](https://github.com/ClickHouse/ClickHouse/pull/67756) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Backported in [#67518](https://github.com/ClickHouse/ClickHouse/issues/67518): Split slow test 03036_dynamic_read_subcolumns. [#66954](https://github.com/ClickHouse/ClickHouse/pull/66954) ([Nikita Taranov](https://github.com/nickitat)).
+* Backported in [#67516](https://github.com/ClickHouse/ClickHouse/issues/67516): Split 01508_partition_pruning_long. [#66983](https://github.com/ClickHouse/ClickHouse/pull/66983) ([Nikita Taranov](https://github.com/nickitat)).
+* Backported in [#67529](https://github.com/ClickHouse/ClickHouse/issues/67529): Reduce max time of 00763_long_lock_buffer_alter_destination_table. [#67185](https://github.com/ClickHouse/ClickHouse/pull/67185) ([Raúl Marín](https://github.com/Algunenano)).
+* Backported in [#67643](https://github.com/ClickHouse/ClickHouse/issues/67643): [Green CI] Fix potentially flaky test_mask_sensitive_info integration test. [#67506](https://github.com/ClickHouse/ClickHouse/pull/67506) ([Alexey Katsman](https://github.com/alexkats)).
+* Backported in [#67609](https://github.com/ClickHouse/ClickHouse/issues/67609): Fix test_zookeeper_config_load_balancing after adding the xdist worker name to the instance. [#67590](https://github.com/ClickHouse/ClickHouse/pull/67590) ([Pablo Marcos](https://github.com/pamarcos)).
+* Backported in [#67871](https://github.com/ClickHouse/ClickHouse/issues/67871): Fix 02434_cancel_insert_when_client_dies. [#67600](https://github.com/ClickHouse/ClickHouse/pull/67600) ([vdimir](https://github.com/vdimir)).
+* Backported in [#67704](https://github.com/ClickHouse/ClickHouse/issues/67704): Fix 02910_bad_logs_level_in_local in fast tests. [#67603](https://github.com/ClickHouse/ClickHouse/pull/67603) ([Raúl Marín](https://github.com/Algunenano)).
+* Backported in [#67689](https://github.com/ClickHouse/ClickHouse/issues/67689): Fix 01605_adaptive_granularity_block_borders. [#67605](https://github.com/ClickHouse/ClickHouse/pull/67605) ([Nikita Taranov](https://github.com/nickitat)).
+* Backported in [#67827](https://github.com/ClickHouse/ClickHouse/issues/67827): Try fix 03143_asof_join_ddb_long. [#67620](https://github.com/ClickHouse/ClickHouse/pull/67620) ([Nikita Taranov](https://github.com/nickitat)).
+* Backported in [#67892](https://github.com/ClickHouse/ClickHouse/issues/67892): Revert "Merge pull request [#66510](https://github.com/ClickHouse/ClickHouse/issues/66510) from canhld94/fix_trivial_count_non_deterministic_func". [#67800](https://github.com/ClickHouse/ClickHouse/pull/67800) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+
--- a/docs/en/development/contrib.md
+++ b/docs/en/development/contrib.md
@ -27,23 +27,23 @@ Avoid dumping copies of external code into the library directory.
 Instead create a Git submodule to pull third-party code from an external upstream repository.

 All submodules used by ClickHouse are listed in the `.gitmodule` file.
-If the library can be used as-is (the default case), you can reference the upstream repository directly.
-If the library needs patching, create a fork of the upstream repository in the [ClickHouse organization on GitHub](https://github.com/ClickHouse).
+- If the library can be used as-is (the default case), you can reference the upstream repository directly.
+- If the library needs patching, create a fork of the upstream repository in the [ClickHouse organization on GitHub](https://github.com/ClickHouse).

 In the latter case, we aim to isolate custom patches as much as possible from upstream commits.
-To that end, create a branch with prefix `clickhouse/` from the branch or tag you want to integrate, e.g. `clickhouse/master` (for branch `master`) or `clickhouse/release/vX.Y.Z` (for tag `release/vX.Y.Z`).
-This ensures that pulls from the upstream repository into the fork will leave custom `clickhouse/` branches unaffected.
-Submodules in `contrib/` must only track `clickhouse/` branches of forked third-party repositories.
+To that end, create a branch with prefix `ClickHouse/` from the branch or tag you want to integrate, e.g. `ClickHouse/2024_2` (for branch `2024_2`) or `ClickHouse/release/vX.Y.Z` (for tag `release/vX.Y.Z`).
+Avoid following upstream development branches `master`/ `main` / `dev` (i.e., prefix branches `ClickHouse/master` / `ClickHouse/main` / `ClickHouse/dev` in the fork repository).
+Such branches are moving targets which make proper versioning harder.
+"Prefix branches" ensure that pulls from the upstream repository into the fork will leave custom `ClickHouse/` branches unaffected.
+Submodules in `contrib/` must only track `ClickHouse/` branches of forked third-party repositories.

-Patches are only applied against `clickhouse/` branches of external libraries.
-For that, push the patch as a branch with `clickhouse/`, e.g. `clickhouse/fix-some-desaster`.
-Then create a PR from the new branch against the custom tracking branch with `clickhouse/` prefix, (e.g. `clickhouse/master` or `clickhouse/release/vX.Y.Z`) and merge the patch.
+Patches are only applied against `ClickHouse/` branches of external libraries.
+
+There are two ways to do that:
+- you like to make a new fix against a `ClickHouse/`-prefix branch in the forked repository, e.g. a sanitizer fix. In that case, push the fix as a branch with `ClickHouse/` prefix, e.g. `ClickHouse/fix-sanitizer-disaster`. Then create a PR from the new branch against the custom tracking branch, e.g. `ClickHouse/2024_2 <-- ClickHouse/fix-sanitizer-disaster` and merge the PR.
+- you update the submodule and need to re-apply earlier patches. In this case, re-creating old PRs is overkill. Instead, simply cherry-pick older commits into the new `ClickHouse/` branch (corresponding to the new version). Feel free to squash commits of PRs that had multiple commits. In the best case, we did contribute custom patches back to upstream and can omit patches in the new version.
+
+Once the submodule has been updated, bump the submodule in ClickHouse to point to the new hash in the fork.

 Create patches of third-party libraries with the official repository in mind and consider contributing the patch back to the upstream repository.
 This makes sure that others will also benefit from the patch and it will not be a maintenance burden for the ClickHouse team.
-
-To pull upstream changes into the submodule, you can use two methods:
- (less work but less clean): merge upstream `master` into the corresponding `clickhouse/` tracking branch in the forked repository. You will need to resolve merge conflicts with previous custom patches. This method can be used when the `clickhouse/` branch tracks an upstream development branch like `master`, `main`, `dev`, etc.
- (more work but cleaner): create a new branch with `clickhouse/` prefix from the upstream commit or tag you like to integrate. Then re-apply all existing patches using new PRs (or squash them into a single PR). This method can be used when the `clickhouse/` branch tracks a specific upstream version branch or tag. It is cleaner in the sense that custom patches and upstream changes are better isolated from each other.
-
-Once the submodule has been updated, bump the submodule in ClickHouse to point to the new hash in the fork.
--- a/docs/en/engines/table-engines/index.md
+++ b/docs/en/engines/table-engines/index.md
@ -61,6 +61,7 @@ Engines in the family:
 - [RabbitMQ](../../engines/table-engines/integrations/rabbitmq.md)
 - [PostgreSQL](../../engines/table-engines/integrations/postgresql.md)
 - [S3Queue](../../engines/table-engines/integrations/s3queue.md)
+- [TimeSeries](../../engines/table-engines/integrations/time-series.md)

 ### Special Engines {#special-engines}

--- a/docs/en/engines/table-engines/integrations/kafka.md
+++ b/docs/en/engines/table-engines/integrations/kafka.md
@ -251,6 +251,44 @@ The number of rows in one Kafka message depends on whether the format is row-bas
 - For row-based formats the number of rows in one Kafka message can be controlled by setting `kafka_max_rows_per_message`.
 - For block-based formats we cannot divide block into smaller parts, but the number of rows in one block can be controlled by general setting [max_block_size](../../../operations/settings/settings.md#setting-max_block_size).

+## Experimental engine to store committed offsets in ClickHouse Keeper
+
+If `allow_experimental_kafka_offsets_storage_in_keeper` is enabled, then two more settings can be specified to the Kafka table engine:
+ - `kafka_keeper_path` specifies the path to the table in ClickHouse Keeper
+ - `kafka_replica_name` specifies the replica name in ClickHouse Keeper
+
+Either both of the settings must be specified or neither of them. When both of them are specified, then a new, experimental Kafka engine will be used. The new engine doesn't depend on storing the committed offsets in Kafka, but stores them in ClickHouse Keeper. It still tries to commit the offsets to Kafka, but it only depends on those offsets when the table is created. In any other circumstances (table is restarted, or recovered after some error) the offsets stored in ClickHouse Keeper will be used as an offset to continue consuming messages from. Apart from the committed offset, it also stores how many messages were consumed in the last batch, so if the insert fails, the same amount of messages will be consumed, thus enabling deduplication if necessary.
+
+Example:
+
+``` sql
+CREATE TABLE experimental_kafka (key UInt64, value UInt64)
+ENGINE = Kafka('localhost:19092', 'my-topic', 'my-consumer', 'JSONEachRow')
+SETTINGS
+  kafka_keeper_path = '/clickhouse/{database}/experimental_kafka',
+  kafka_replica_name = 'r1'
+SETTINGS allow_experimental_kafka_offsets_storage_in_keeper=1;
+```
+
+Or to utilize the `uuid` and `replica` macros similarly to ReplicatedMergeTree:
+
+``` sql
+CREATE TABLE experimental_kafka (key UInt64, value UInt64)
+ENGINE = Kafka('localhost:19092', 'my-topic', 'my-consumer', 'JSONEachRow')
+SETTINGS
+  kafka_keeper_path = '/clickhouse/{database}/{uuid}',
+  kafka_replica_name = '{replica}'
+SETTINGS allow_experimental_kafka_offsets_storage_in_keeper=1;
+```
+
+### Known limitations
+
+As the new engine is experimental, it is not production ready yet. There are few known limitations of the implementation:
+ - The biggest limitation is the engine doesn't support direct reading. Reading from the engine using materialized views and writing to the engine work, but direct reading doesn't. As a result, all direct `SELECT` queries will fail.
+ - Rapidly dropping and recreating the table or specifying the same ClickHouse Keeper path to different engines might cause issues. As best practice you can use the `{uuid}` in `kafka_keeper_path` to avoid clashing paths.
+ - To make repeatable reads, messages cannot be consumed from multiple partitions on a single thread. On the other hand, the Kafka consumers have to be polled regularly to keep them alive. As a result of these two objectives, we decided to only allow creating multiple consumers if `kafka_thread_per_consumer` is enabled, otherwise it is too complicated to avoid issues regarding polling consumers regularly.
+ - Consumers created by the new storage engine do not show up in [`system.kafka_consumers`](../../../operations/system-tables/kafka_consumers.md) table.
+
 **See Also**

 - [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns)
--- a/docs/en/engines/table-engines/integrations/s3.md
+++ b/docs/en/engines/table-engines/integrations/s3.md
@ -146,6 +146,7 @@ Code: 48. DB::Exception: Received from localhost:9000. DB::Exception: Reading fr
 - `_file` — Name of the file. Type: `LowCardinalty(String)`.
 - `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
 - `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`.
+- `_etag` — ETag of the file. Type: `LowCardinalty(String)`. If the etag is unknown, the value is `NULL`.

 For more information about virtual columns see [here](../../../engines/table-engines/index.md#table_engines-virtual_columns).

--- a/docs/en/engines/table-engines/integrations/time-series.md
+++ b/docs/en/engines/table-engines/integrations/time-series.md
@ -0,0 +1,295 @@
+---
+slug: /en/engines/table-engines/special/time_series
+sidebar_position: 60
+sidebar_label: TimeSeries
+---
+
+# TimeSeries Engine [Experimental]
+
+A table engine storing time series, i.e. a set of values associated with timestamps and tags (or labels):
+
+```
+metric_name1[tag1=value1, tag2=value2, ...] = {timestamp1: value1, timestamp2: value2, ...}
+metric_name2[...] = ...
+```
+
+:::info
+This is an experimental feature that may change in backwards-incompatible ways in the future releases.
+Enable usage of the TimeSeries table engine
+with [allow_experimental_time_series_table](../../../operations/settings/settings.md#allow-experimental-time-series-table) setting.
+Input the command `set allow_experimental_time_series_table = 1`.
+:::
+
+## Syntax {#syntax}
+
+``` sql
+CREATE TABLE name [(columns)] ENGINE=TimeSeries
+[SETTINGS var1=value1, ...]
+[DATA db.data_table_name | DATA ENGINE data_table_engine(arguments)]
+[TAGS db.tags_table_name | TAGS ENGINE tags_table_engine(arguments)]
+[METRICS db.metrics_table_name | METRICS ENGINE metrics_table_engine(arguments)]
+```
+
+## Usage {#usage}
+
+It's easier to start with everything set by default (it's allowed to create a `TimeSeries` table without specifying a list of columns):
+
+``` sql
+CREATE TABLE my_table ENGINE=TimeSeries
+```
+
+Then this table can be used with the following protocols (a port must be assigned in the server configuration):
+- [prometheus remote-write](../../../interfaces/prometheus.md#remote-write)
+- [prometheus remote-read](../../../interfaces/prometheus.md#remote-read)
+
+## Target tables {#target-tables}
+
+A `TimeSeries` table doesn't have its own data, everything is stored in its target tables.
+This is similar to how a [materialized view](../../../sql-reference/statements/create/view#materialized-view) works,
+with the difference that a materialized view has one target table
+whereas a `TimeSeries` table has three target tables named [data]{#data-table}, [tags]{#tags-table], and [metrics]{#metrics-table}.
+
+The target tables can be either specified explicitly in the `CREATE TABLE` query
+or the `TimeSeries` table engine can generate inner target tables automatically.
+
+The target tables are the following:
+1. The _data_ table {#data-table} contains time series associated with some identifier.
+The _data_ table must have columns:
+
+| Name | Mandatory? | Default type | Possible types | Description |
+|---|---|---|---|---|
+| `id` | [x] | `UUID` | any | Identifies a combination of a metric names and tags |
+| `timestamp` | [x] | `DateTime64(3)` | `DateTime64(X)` | A time point |
+| `value` | [x] | `Float64` | `Float32` or `Float64` | A value associated with the `timestamp` |
+
+2. The _tags_ table {#tags-table} contains identifiers calculated for each combination of a metric name and tags.
+The _tags_ table must have columns:
+
+| Name | Mandatory? | Default type | Possible types | Description |
+|---|---|---|---|---|
+| `id` | [x] | `UUID` | any (must match the type of `id` in the [data]{#data-table} table) | An `id` identifies a combination of a metric name and tags. The DEFAULT expression specifies how to calculate such an identifier |
+| `metric_name` | [x] | `LowCardinality(String)` | `String` or `LowCardinality(String)` | The name of a metric |
+| `<tag_value_column>` | [ ] | `String` | `String` or `LowCardinality(String)` or `LowCardinality(Nullable(String))` | The value of a specific tag, the tag's name and the name of a corresponding column are specified in the [tags_to_columns](#settings) setting |
+| `tags` | [x] | `Map(LowCardinality(String), String)` | `Map(String, String)` or `Map(LowCardinality(String), String)` or `Map(LowCardinality(String), LowCardinality(String))` | Map of tags excluding the tag `__name__` containing the name of a metric and excluding tags with names enumerated in the [tags_to_columns](#settings) setting |
+| `all_tags` | [ ] | `Map(String, String)` | `Map(String, String)` or `Map(LowCardinality(String), String)` or `Map(LowCardinality(String), LowCardinality(String))` | Ephemeral column, each row is a map of all the tags excluding only the tag `__name__` containing the name of a metric. The only purpose of that column is to be used while calculating `id` |
+| `min_time` | [ ] | `Nullable(DateTime64(3))` | `DateTime64(X)` or `Nullable(DateTime64(X))` | Minimum timestamp of time series with that `id`. The column is created if [store_min_time_and_max_time](#settings) is `true` |
+| `max_time` | [ ] | `Nullable(DateTime64(3))` | `DateTime64(X)` or `Nullable(DateTime64(X))` | Maximum timestamp of time series with that `id`. The column is created if [store_min_time_and_max_time](#settings) is `true` |
+
+3. The _metrics_ table {#metrics-table} contains some information about metrics been collected, the types of those metrics and their descriptions.
+The _metrics_ table must have columns:
+
+| Name | Mandatory? | Default type | Possible types | Description |
+|---|---|---|---|---|
+| `metric_family_name` | [x] | `String` | `String` or `LowCardinality(String)` | The name of a metric family |
+| `type` | [x] | `String` | `String` or `LowCardinality(String)` | The type of a metric family, one of "counter", "gauge", "summary", "stateset", "histogram", "gaugehistogram" |
+| `unit` | [x] | `String` | `String` or `LowCardinality(String)` | The unit used in a metric |
+| `help` | [x] | `String` | `String` or `LowCardinality(String)` | The description of a metric |
+
+Any row inserted into a `TimeSeries` table will be in fact stored in those three target tables.
+A `TimeSeries` table contains all those columns from the [data]{#data-table}, [tags]{#tags-table}, [metrics]{#metrics-table} tables.
+
+## Creation {#creation}
+
+There are multiple ways to create a table with the `TimeSeries` table engine.
+The simplest statement
+
+``` sql
+CREATE TABLE my_table ENGINE=TimeSeries
+```
+
+will actually create the following table (you can see that by executing `SHOW CREATE TABLE my_table`):
+
+``` sql
+CREATE TABLE my_table
+(
+    `id` UUID DEFAULT reinterpretAsUUID(sipHash128(metric_name, all_tags)),
+    `timestamp` DateTime64(3),
+    `value` Float64,
+    `metric_name` LowCardinality(String),
+    `tags` Map(LowCardinality(String), String),
+    `all_tags` Map(String, String),
+    `min_time` Nullable(DateTime64(3)),
+    `max_time` Nullable(DateTime64(3)),
+    `metric_family_name` String,
+    `type` String,
+    `unit` String,
+    `help` String
+)
+ENGINE = TimeSeries
+DATA ENGINE = MergeTree ORDER BY (id, timestamp)
+DATA INNER UUID '01234567-89ab-cdef-0123-456789abcdef'
+TAGS ENGINE = AggregatingMergeTree PRIMARY KEY metric_name ORDER BY (metric_name, id)
+TAGS INNER UUID '01234567-89ab-cdef-0123-456789abcdef'
+METRICS ENGINE = ReplacingMergeTree ORDER BY metric_family_name
+METRICS INNER UUID '01234567-89ab-cdef-0123-456789abcdef'
+```
+
+So the columns were generated automatically and also there are three inner UUIDs in this statement -
+one per each inner target table that was created.
+(Inner UUIDs are not shown normally until setting
+[show_table_uuid_in_table_create_query_if_not_nil](../../../operations/settings/settings#show_table_uuid_in_table_create_query_if_not_nil)
+is set.)
+
+Inner target tables have names like `.inner_id.data.xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`,
+`.inner_id.tags.xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`, `.inner_id.metrics.xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`
+and each target table has columns which is a subset of the columns of the main `TimeSeries` table:
+
+``` sql
+CREATE TABLE default.`.inner_id.data.xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`
+(
+    `id` UUID,
+    `timestamp` DateTime64(3),
+    `value` Float64
+)
+ENGINE = MergeTree
+ORDER BY (id, timestamp)
+```
+
+``` sql
+CREATE TABLE default.`.inner_id.tags.xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`
+(
+    `id` UUID DEFAULT reinterpretAsUUID(sipHash128(metric_name, all_tags)),
+    `metric_name` LowCardinality(String),
+    `tags` Map(LowCardinality(String), String),
+    `all_tags` Map(String, String) EPHEMERAL,
+    `min_time` SimpleAggregateFunction(min, Nullable(DateTime64(3))),
+    `max_time` SimpleAggregateFunction(max, Nullable(DateTime64(3)))
+)
+ENGINE = AggregatingMergeTree
+PRIMARY KEY metric_name
+ORDER BY (metric_name, id)
+```
+
+``` sql
+CREATE TABLE default.`.inner_id.metrics.xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`
+(
+    `metric_family_name` String,
+    `type` String,
+    `unit` String,
+    `help` String
+)
+ENGINE = ReplacingMergeTree
+ORDER BY metric_family_name
+```
+
+## Adjusting types of columns {#adjusting-column-types}
+
+You can adjust the types of almost any column of the inner target tables by specifying them explicitly
+while defining the main table. For example,
+
+``` sql
+CREATE TABLE my_table
+(
+    timestamp DateTime64(6)
+) ENGINE=TimeSeries
+```
+
+will make the inner [data]{#data-table} table store timestamp in microseconds instead of milliseconds:
+
+``` sql
+CREATE TABLE default.`.inner_id.data.xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`
+(
+    `id` UUID,
+    `timestamp` DateTime64(6),
+    `value` Float64
+)
+ENGINE = MergeTree
+ORDER BY (id, timestamp)
+```
+
+## The `id` column {#id-column}
+
+The `id` column contains identifiers, every identifier is calculated for a combination of a metric name and tags.
+The DEFAULT expression for the `id` column is an expression which will be used to calculate such identifiers.
+Both the type of the `id` column and that expression can be adjusted by specifying them explicitly:
+
+``` sql
+CREATE TABLE my_table
+(
+    id UInt64 DEFAULT sipHash64(metric_name, all_tags)
+) ENGINE=TimeSeries
+```
+
+## The `tags` and `all_tags` columns {#tags-and-all-tags}
+
+There are two columns containing maps of tags - `tags` and `all_tags`. In this example they mean the same, however they can be different
+if setting `tags_to_columns` is used. This setting allows to specify that a specific tag should be stored in a separate column instead of storing
+in a map inside the `tags` column:
+
+``` sql
+CREATE TABLE my_table ENGINE=TimeSeries SETTINGS = {'instance': 'instance', 'job': 'job'}
+```
+
+This statement will add columns
+```
+    `instance` String,
+    `job` String
+```
+to the definition of both `my_table` and its inner [tags]{#tags-table} target table. In this case the `tags` column will not contain tags `instance` and `job`,
+but the `all_tags` column will contain them. The `all_tags` column is ephemeral and its only purpose to be used in the DEFAULT expression
+for the `id` column.
+
+The types of columns can be adjusted by specifying them explicitly:
+
+``` sql
+CREATE TABLE my_table (instance LowCardinality(String), job LowCardinality(Nullable(String)))
+ENGINE=TimeSeries SETTINGS = {'instance': 'instance', 'job': 'job'}
+```
+
+## Table engines of inner target tables {#inner-table-engines}
+
+By default inner target tables use the following table engines:
+- the [data]{#data-table} table uses [MergeTree](../mergetree-family/mergetree);
+- the [tags]{#tags-table} table uses [AggregatingMergeTree](../mergetree-family/aggregatingmergetree) because the same data is often inserted multiple times to this table so we need a way
+to remove duplicates, and also because it's required to do aggregation for columns `min_time` and `max_time`;
+- the [metrics]{#metrics-table} table uses [ReplacingMergeTree](../mergetree-family/replacingmergetree) because the same data is often inserted multiple times to this table so we need a way
+to remove duplicates.
+
+Other table engines also can be used for inner target tables if it's specified so:
+
+``` sql
+CREATE TABLE my_table ENGINE=TimeSeries
+DATA ENGINE=ReplicatedMergeTree
+TAGS ENGINE=ReplicatedAggregatingMergeTree
+METRICS ENGINE=ReplicatedReplacingMergeTree
+```
+
+## External target tables {#external-target-tables}
+
+It's possible to make a `TimeSeries` table use a manually created table:
+
+``` sql
+CREATE TABLE data_for_my_table
+(
+    `id` UUID,
+    `timestamp` DateTime64(3),
+    `value` Float64
+)
+ENGINE = MergeTree
+ORDER BY (id, timestamp);
+
+CREATE TABLE tags_for_my_table ...
+
+CREATE TABLE metrics_for_my_table ...
+
+CREATE TABLE my_table ENGINE=TimeSeries DATA data_for_my_table TAGS tags_for_my_table METRICS metrics_for_my_table;
+```
+
+## Settings {#settings}
+
+Here is a list of settings which can be specified while defining a `TimeSeries` table:
+
+| Name | Type | Default | Description |
+|---|---|---|---|
+| `tags_to_columns` | Map | {} | Map specifying which tags should be put to separate columns in the [tags]{#tags-table} table. Syntax: `{'tag1': 'column1', 'tag2' : column2, ...}` |
+| `use_all_tags_column_to_generate_id` | Bool | true | When generating an expression to calculate an identifier of a time series, this flag enables using the `all_tags` column in that calculation |
+| `store_min_time_and_max_time` | Bool | true | If set to true then the table will store `min_time` and `max_time` for each time series |
+| `aggregate_min_time_and_max_time` | Bool | true | When creating an inner target `tags` table, this flag enables using `SimpleAggregateFunction(min, Nullable(DateTime64(3)))` instead of just `Nullable(DateTime64(3))` as the type of the `min_time` column, and the same for the `max_time` column |
+| `filter_by_min_time_and_max_time` | Bool | true | If set to true then the table will use the `min_time` and `max_time` columns for filtering time series |
+
+# Functions {#functions}
+
+Here is a list of functions supporting a `TimeSeries` table as an argument:
+- [timeSeriesData](../../../sql-reference/table-functions/timeSeriesData.md)
+- [timeSeriesTags](../../../sql-reference/table-functions/timeSeriesTags.md)
+- [timeSeriesMetrics](../../../sql-reference/table-functions/timeSeriesMetrics.md)
--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@ -1005,7 +1005,7 @@ They can be used for prewhere optimization only if we enable `set allow_statisti

 ## Column-level Settings {#column-level-settings}

-Certain MergeTree settings can be override at column level:
+Certain MergeTree settings can be overridden at column level:

 - `max_compress_block_size` — Maximum size of blocks of uncompressed data before compressing for writing to a table.
 - `min_compress_block_size` — Minimum size of blocks of uncompressed data required for compression when writing the next mark.
--- a/docs/en/interfaces/http.md
+++ b/docs/en/interfaces/http.md
@ -379,7 +379,7 @@ You can mitigate this problem by enabling `wait_end_of_query=1` ([Response Buffe
 However, this does not completely solve the problem because the result must still fit within the `http_response_buffer_size`, and other settings like `send_progress_in_http_headers` can interfere with the delay of the header.
 The only way to catch all errors is to analyze the HTTP body before parsing it using the required format.

-### Queries with Parameters {#cli-queries-with-parameters}
+## Queries with Parameters {#cli-queries-with-parameters}

 You can create a query with parameters and pass values for them from the corresponding HTTP request parameters. For more information, see [Queries with Parameters for CLI](../interfaces/cli.md#cli-queries-with-parameters).

--- a/docs/en/interfaces/prometheus.md
+++ b/docs/en/interfaces/prometheus.md
@ -0,0 +1,160 @@
+---
+slug: /en/interfaces/prometheus
+sidebar_position: 19
+sidebar_label: Prometheus protocols
+---
+
+# Prometheus protocols
+
+## Exposing metrics {#expose}
+
+:::note
+ClickHouse Cloud does not currently support connecting to Prometheus. To be notified when this feature is supported, please contact support@clickhouse.com.
+:::
+
+ClickHouse can expose its own metrics for scraping from Prometheus:
+
+```xml
+<prometheus>
+    <port>9363</port>
+    <endpoint>/metrics</endpoint>
+    <metrics>true</metrics>
+    <asynchronous_metrics>true</asynchronous_metrics>
+    <events>true</events>
+    <errors>true</errors>
+</prometheus>
+
+Section `<prometheus.handlers>` can be used to make more extended handlers.
+This section is similar to [<http_handlers>](/en/interfaces/http) but works for prometheus protocols:
+
+```xml
+<prometheus>
+    <port>9363</port>
+    <handlers>
+        <my_rule_1>
+            <url>/metrics</url>
+            <handler>
+                <type>expose_metrics</type>
+                <metrics>true</metrics>
+                <asynchronous_metrics>true</asynchronous_metrics>
+                <events>true</events>
+                <errors>true</errors>
+            </handler>
+        </my_rule_1>
+    </handlers>
+</prometheus>
+```
+
+Settings:
+
+| Name | Default | Description |
+|---|---|---|---|
+| `port` | none | Port for serving the exposing metrics protocol. |
+| `endpoint` | `/metrics` | HTTP endpoint for scraping metrics by prometheus server. Starts with `/`. Should not be used with the `<handlers>` section. |
+| `url` / `headers` / `method` | none | Filters used to find a matching handler for a request. Similar to the fields with the same names in the [<http_handlers>](/en/interfaces/http) section. |
+| `metrics` | true | Expose metrics from the [system.metrics](/en/operations/system-tables/metrics) table. |
+| `asynchronous_metrics` | true | Expose current metrics values from the [system.asynchronous_metrics](/en/operations/system-tables/asynchronous_metrics) table. |
+| `events` | true | Expose metrics from the [system.events](/en/operations/system-tables/events) table. |
+| `errors` | true | Expose the number of errors by error codes occurred since the last server restart. This information could be obtained from the [system.errors](/en/operations/system-tables/errors) as well. |
+
+Check (replace `127.0.0.1` with the IP addr or hostname of your ClickHouse server):
+```bash
+curl 127.0.0.1:9363/metrics
+```
+
+## Remote-write protocol {#remote-write}
+
+ClickHouse supports the [remote-write](https://prometheus.io/docs/specs/remote_write_spec/) protocol.
+Data are received by this protocol and written to a [TimeSeries](/en/engines/table-engines/special/time_series) table
+(which should be created beforehand).
+
+```xml
+<prometheus>
+    <port>9363</port>
+    <handlers>
+        <my_rule_1>
+            <url>/write</url>
+            <handler>
+                <type>remote_write</type
+                <database>db_name</database>
+                <table>time_series_table</table>
+            </handler>
+        </my_rule_1>
+    </handlers>
+</prometheus>
+```
+
+Settings:
+
+| Name | Default | Description |
+|---|---|---|---|
+| `port` | none | Port for serving the `remote-write` protocol. |
+| `url` / `headers` / `method` | none | Filters used to find a matching handler for a request. Similar to the fields with the same names in the [<http_handlers>](/en/interfaces/http) section. |
+| `table` | none | The name of a [TimeSeries](/en/engines/table-engines/special/time_series) table to write data received by the `remote-write` protocol. This name can optionally contain the name of a database too. |
+| `database` | none | The name of a database where the table specified in the `table` setting is located if it's not specified in the `table` setting. |
+
+## Remote-read protocol {#remote-read}
+
+ClickHouse supports the [remote-read](https://prometheus.io/docs/prometheus/latest/querying/remote_read_api/) protocol.
+Data are read from a [TimeSeries](/en/engines/table-engines/special/time_series) table and sent via this protocol.
+
+```xml
+<prometheus>
+    <port>9363</port>
+    <handlers>
+        <my_rule_1>
+            <url>/read</url>
+            <handler>
+                <type>remote_read</type
+                <database>db_name</database>
+                <table>time_series_table</table>
+            </handler>
+        </my_rule_1>
+    </handlers>
+</prometheus>
+```
+
+Settings:
+
+| Name | Default | Description |
+|---|---|---|---|
+| `port` | none | Port for serving the `remote-read` protocol. |
+| `url` / `headers` / `method` | none | Filters used to find a matching handler for a request. Similar to the fields with the same names in the [<http_handlers>](/en/interfaces/http) section. |
+| `table` | none | The name of a [TimeSeries](/en/engines/table-engines/special/time_series) table to read data to send by the `remote-read` protocol. This name can optionally contain the name of a database too. |
+| `database` | none | The name of a database where the table specified in the `table` setting is located if it's not specified in the `table` setting. |
+
+## Configuration for multiple protocols {#multiple-protocols}
+
+Multiple protocols can be specified together in one place:
+
+```xml
+<prometheus>
+    <port>9363</port>
+    <handlers>
+        <my_rule_1>
+            <url>/metrics</url>
+            <handler>
+                <type>expose_metrics</type>
+                <metrics>true</metrics>
+                <asynchronous_metrics>true</asynchronous_metrics>
+                <events>true</events>
+                <errors>true</errors>
+            </handler>
+        </my_rule_1>
+        <my_rule_2>
+            <url>/write</url>
+            <handler>
+                <type>remote_write</type
+                <table>db_name.time_series_table</table>
+            </handler>
+        </my_rule_2>
+        <my_rule_3>
+            <url>/read</url>
+            <handler>
+                <type>remote_read</type
+                <table>db_name.time_series_table</table>
+            </handler>
+        </my_rule_3>
+    </handlers>
+</prometheus>
+```
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@ -2112,48 +2112,6 @@ The trailing slash is mandatory.
 <path>/var/lib/clickhouse/</path>
 ```

-## Prometheus {#prometheus}
-
-:::note
-ClickHouse Cloud does not currently support connecting to Prometheus. To be notified when this feature is supported, please contact support@clickhouse.com.
-:::
-
-Exposing metrics data for scraping from [Prometheus](https://prometheus.io).
-
-Settings:
-
- `endpoint` – HTTP endpoint for scraping metrics by prometheus server. Start from ‘/’.
- `port` – Port for `endpoint`.
- `metrics` – Expose metrics from the [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) table.
- `events` – Expose metrics from the [system.events](../../operations/system-tables/events.md#system_tables-events) table.
- `asynchronous_metrics` – Expose current metrics values from the [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) table.
- `errors` - Expose the number of errors by error codes occurred since the last server restart. This information could be obtained from the [system.errors](../../operations/system-tables/asynchronous_metrics.md#system_tables-errors) as well.
-
-**Example**
-
-``` xml
-<clickhouse>
-    <listen_host>0.0.0.0</listen_host>
-    <http_port>8123</http_port>
-    <tcp_port>9000</tcp_port>
-    <!-- highlight-start -->
-    <prometheus>
-        <endpoint>/metrics</endpoint>
-        <port>9363</port>
-        <metrics>true</metrics>
-        <events>true</events>
-        <asynchronous_metrics>true</asynchronous_metrics>
-        <errors>true</errors>
-    </prometheus>
-    <!-- highlight-end -->
-</clickhouse>
-```
-
-Check (replace `127.0.0.1` with the IP addr or hostname of your ClickHouse server):
-```bash
-curl 127.0.0.1:9363/metrics
-```
-
 ## query_log {#query-log}

 Setting for logging queries received with the [log_queries=1](../../operations/settings/settings.md) setting.
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@ -4629,8 +4629,8 @@ Default Value: 5.

 ## memory_overcommit_ratio_denominator {#memory_overcommit_ratio_denominator}

-It represents soft memory limit in case when hard limit is reached on user level.
-This value is used to compute overcommit ratio for the query.
+It represents the soft memory limit when the hard limit is reached on the global level.
+This value is used to compute the overcommit ratio for the query.
 Zero means skip the query.
 Read more about [memory overcommit](memory-overcommit.md).

@ -4646,8 +4646,8 @@ Default value: `5000000`.

 ## memory_overcommit_ratio_denominator_for_user {#memory_overcommit_ratio_denominator_for_user}

-It represents soft memory limit in case when hard limit is reached on global level.
-This value is used to compute overcommit ratio for the query.
+It represents the soft memory limit when the hard limit is reached on the user level.
+This value is used to compute the overcommit ratio for the query.
 Zero means skip the query.
 Read more about [memory overcommit](memory-overcommit.md).

@ -5609,8 +5609,31 @@ Minimal size of block to compress in CROSS JOIN. Zero value means - disable this

 Default value: `1GiB`.

+## restore_replace_external_engines_to_null
+
+For testing purposes. Replaces all external engines to Null to not initiate external connections.
+
+Default value: `False`
+
+## restore_replace_external_table_functions_to_null
+
+For testing purposes. Replaces all external table functions to Null to not initiate external connections.
+
+Default value: `False`
+
 ## disable_insertion_and_mutation

 Disable all insert and mutations (alter table update / alter table delete / alter table drop partition). Set to true, can make this node focus on reading queries.

 Default value: `false`.
+
+## allow_experimental_time_series_table {#allow-experimental-time-series-table}
+
+Allows creation of tables with the [TimeSeries](../../engines/table-engines/integrations/time-series.md) table engine.
+
+Possible values:
+
+- 0 — the [TimeSeries](../../engines/table-engines/integrations/time-series.md) table engine is disabled.
+- 1 — the [TimeSeries](../../engines/table-engines/integrations/time-series.md) table engine is enabled.
+
+Default value: `0`.
--- a/docs/en/operations/system-tables/trace_log.md
+++ b/docs/en/operations/system-tables/trace_log.md
@ -3,7 +3,7 @@ slug: /en/operations/system-tables/trace_log
 ---
 # trace_log

-Contains stack traces collected by the sampling query profiler.
+Contains stack traces collected by the [sampling query profiler](../../operations/optimizing-performance/sampling-query-profiler.md).

 ClickHouse creates this table when the [trace_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-trace_log) server configuration section is set. Also see settings: [query_profiler_real_time_period_ns](../../operations/settings/settings.md#query_profiler_real_time_period_ns), [query_profiler_cpu_time_period_ns](../../operations/settings/settings.md#query_profiler_cpu_time_period_ns), [memory_profiler_step](../../operations/settings/settings.md#memory_profiler_step),
 [memory_profiler_sample_probability](../../operations/settings/settings.md#memory_profiler_sample_probability), [trace_profile_events](../../operations/settings/settings.md#trace_profile_events).
--- a/docs/en/operations/utilities/clickhouse-keeper-client.md
+++ b/docs/en/operations/utilities/clickhouse-keeper-client.md
@ -28,39 +28,39 @@ A client application to interact with clickhouse-keeper by its native protocol.
 Connected to ZooKeeper at [::1]:9181 with session_id 137
 / :) ls
 keeper foo bar
-/ :) cd keeper
+/ :) cd 'keeper'
 /keeper :) ls
 api_version
-/keeper :) cd api_version
+/keeper :) cd 'api_version'
 /keeper/api_version :) ls

-/keeper/api_version :) cd xyz
+/keeper/api_version :) cd 'xyz'
 Path /keeper/api_version/xyz does not exist
 /keeper/api_version :) cd ../../
 / :) ls
 keeper foo bar
-/ :) get keeper/api_version
+/ :) get 'keeper/api_version'
 2
 ```

 ## Commands {#clickhouse-keeper-client-commands}

-   `ls [path]` -- Lists the nodes for the given path (default: cwd)
-   `cd [path]` -- Changes the working path (default `.`)
-   `exists <path>` -- Returns `1` if node exists, `0` otherwise
-   `set <path> <value> [version]` -- Updates the node's value. Only updates if version matches (default: -1)
-   `create <path> <value> [mode]` -- Creates new node with the set value
-   `touch <path>` -- Creates new node with an empty string as value. Doesn't throw an exception if the node already exists
-   `get <path>` -- Returns the node's value
-   `rm <path> [version]` -- Removes the node only if version matches (default: -1)
-   `rmr <path>` -- Recursively deletes path. Confirmation required
+-   `ls '[path]'` -- Lists the nodes for the given path (default: cwd)
+-   `cd '[path]'` -- Changes the working path (default `.`)
+-   `exists '<path>'` -- Returns `1` if node exists, `0` otherwise
+-   `set '<path>' <value> [version]` -- Updates the node's value. Only updates if version matches (default: -1)
+-   `create '<path>' <value> [mode]` -- Creates new node with the set value
+-   `touch '<path>'` -- Creates new node with an empty string as value. Doesn't throw an exception if the node already exists
+-   `get '<path>'` -- Returns the node's value
+-   `rm '<path>' [version]` -- Removes the node only if version matches (default: -1)
+-   `rmr '<path>'` -- Recursively deletes path. Confirmation required
 -   `flwc <command>` -- Executes four-letter-word command
 -   `help` -- Prints this message
-   `get_direct_children_number [path]` -- Get numbers of direct children nodes under a specific path
-   `get_all_children_number [path]` -- Get all numbers of children nodes under a specific path
-   `get_stat [path]` -- Returns the node's stat (default `.`)
-   `find_super_nodes <threshold> [path]` -- Finds nodes with number of children larger than some threshold for the given path (default `.`)
+-   `get_direct_children_number '[path]'` -- Get numbers of direct children nodes under a specific path
+-   `get_all_children_number '[path]'` -- Get all numbers of children nodes under a specific path
+-   `get_stat '[path]'` -- Returns the node's stat (default `.`)
+-   `find_super_nodes <threshold> '[path]'` -- Finds nodes with number of children larger than some threshold for the given path (default `.`)
 -   `delete_stale_backups` -- Deletes ClickHouse nodes used for backups that are now inactive
 -   `find_big_family [path] [n]` -- Returns the top n nodes with the biggest family in the subtree (default path = `.` and n = 10)
-   `sync <path>` -- Synchronizes node between processes and leader
+-   `sync '<path>'` -- Synchronizes node between processes and leader
 -   `reconfig <add|remove|set> "<arg>" [version]` -- Reconfigure Keeper cluster. See https://clickhouse.com/docs/en/guides/sre/keeper/clickhouse-keeper#reconfiguration
--- a/docs/en/sql-reference/aggregate-functions/reference/groupconcat.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/groupconcat.md
@ -10,7 +10,7 @@ Calculates a concatenated string from a group of strings, optionally separated b
 **Syntax**

 ``` sql
-groupConcat(expression [, delimiter] [, limit]);
+groupConcat[(delimiter [, limit])](expression);
 ```

 **Arguments**
@ -20,7 +20,7 @@ groupConcat(expression [, delimiter] [, limit]);
 - `limit` — A positive [integer](../../../sql-reference/data-types/int-uint.md) specifying the maximum number of elements to concatenate. If more elements are present, excess elements are ignored. This parameter is optional.

 :::note
-If delimiter is specified without limit, it must be the first parameter following the expression. If both delimiter and limit are specified, delimiter must precede limit.
+If delimiter is specified without limit, it must be the first parameter. If both delimiter and limit are specified, delimiter must precede limit.
 :::

 **Returned value**
@ -61,7 +61,7 @@ This concatenates all names into one continuous string without any separator.
 Query:

 ``` sql
-SELECT groupConcat(Name, ', ', 2) FROM Employees;
+SELECT groupConcat(', ')(Name)  FROM Employees;
 ```

 Result:
@ -78,7 +78,7 @@ This output shows the names separated by a comma followed by a space.
 Query:

 ``` sql
-SELECT groupConcat(Name, ', ', 2) FROM Employees;
+SELECT groupConcat(', ', 2)(Name) FROM Employees;
 ```

 Result:
--- a/docs/en/sql-reference/functions/type-conversion-functions.md
+++ b/docs/en/sql-reference/functions/type-conversion-functions.md
@ -3045,13 +3045,425 @@ toUInt256OrDefault('abc', CAST('0', 'UInt256')):  0
 - [`toUInt256OrZero`](#touint256orzero).
 - [`toUInt256OrNull`](#touint256ornull).

-## toFloat(32\|64)
+## toFloat32

-## toFloat(32\|64)OrZero
+Converts an input value to a value of type [`Float32`](../data-types/float.md). Throws an exception in case of an error.

-## toFloat(32\|64)OrNull
+**Syntax**

-## toFloat(32\|64)OrDefault
+```sql
+toFloat32(expr)
+```
+
+**Arguments**
+
+- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions).
+
+Supported arguments:
+- Values of type (U)Int8/16/32/64/128/256.
+- String representations of (U)Int8/16/32/128/256.
+- Values of type Float32/64, including `NaN` and `Inf`.
+- String representations of Float32/64, including `NaN` and `Inf` (case-insensitive).
+
+Unsupported arguments:
+- String representations of binary and hexadecimal values, e.g. `SELECT toFloat32('0xc0fe');`.
+
+**Returned value**
+
+- 32-bit floating point value. [Float32](../data-types/float.md).
+
+**Example**
+
+Query:
+
+```sql
+SELECT
+    toFloat32(42.7),
+    toFloat32('42.7'),
+    toFloat32('NaN')
+FORMAT Vertical;
+```
+
+Result:
+
+```response
+Row 1:
+──────
+toFloat32(42.7):   42.7
+toFloat32('42.7'): 42.7
+toFloat32('NaN'):  nan
+```
+
+**See also**
+
+- [`toFloat32OrZero`](#tofloat32orzero).
+- [`toFloat32OrNull`](#tofloat32ornull).
+- [`toFloat32OrDefault`](#tofloat32ordefault).
+
+## toFloat32OrZero
+
+Like [`toFloat32`](#tofloat32), this function converts an input value to a value of type [Float32](../data-types/float.md) but returns `0` in case of an error.
+
+**Syntax**
+
+```sql
+toFloat32OrZero(x)
+```
+
+**Arguments**
+
+- `x` — A String representation of a number. [String](../data-types/string.md).
+
+Supported arguments:
+- String representations of (U)Int8/16/32/128/256, Float32/64.
+
+Unsupported arguments (return `0`):
+- String representations of binary and hexadecimal values, e.g. `SELECT toFloat32OrZero('0xc0fe');`.
+
+**Returned value**
+
+- 32-bit Float value if successful, otherwise `0`. [Float32](../data-types/float.md).
+
+**Example**
+
+Query:
+
+```sql
+SELECT
+    toFloat32OrZero('42.7'),
+    toFloat32OrZero('abc')
+FORMAT Vertical;
+```
+
+Result:
+
+```response
+Row 1:
+──────
+toFloat32OrZero('42.7'): 42.7
+toFloat32OrZero('abc'):  0
+```
+
+**See also**
+
+- [`toFloat32`](#tofloat32).
+- [`toFloat32OrNull`](#tofloat32ornull).
+- [`toFloat32OrDefault`](#tofloat32ordefault).
+
+## toFloat32OrNull
+
+Like [`toFloat32`](#tofloat32), this function converts an input value to a value of type [Float32](../data-types/float.md) but returns `NULL` in case of an error.
+
+**Syntax**
+
+```sql
+toFloat32OrNull(x)
+```
+
+**Arguments**
+
+- `x` — A String representation of a number. [String](../data-types/string.md).
+
+Supported arguments:
+- String representations of (U)Int8/16/32/128/256, Float32/64.
+
+Unsupported arguments (return `\N`):
+- String representations of binary and hexadecimal values, e.g. `SELECT toFloat32OrNull('0xc0fe');`.
+
+**Returned value**
+
+- 32-bit Float value if successful, otherwise `\N`. [Float32](../data-types/float.md).
+
+**Example**
+
+Query:
+
+```sql
+SELECT
+    toFloat32OrNull('42.7'),
+    toFloat32OrNull('abc')
+FORMAT Vertical;
+```
+
+Result:
+
+```response
+Row 1:
+──────
+toFloat32OrNull('42.7'): 42.7
+toFloat32OrNull('abc'):  ᴺᵁᴸᴸ
+```
+
+**See also**
+
+- [`toFloat32`](#tofloat32).
+- [`toFloat32OrZero`](#tofloat32orzero).
+- [`toFloat32OrDefault`](#tofloat32ordefault).
+
+## toFloat32OrDefault
+
+Like [`toFloat32`](#tofloat32), this function converts an input value to a value of type [Float32](../data-types/float.md) but returns the default value in case of an error.
+If no `default` value is passed then `0` is returned in case of an error.
+
+**Syntax**
+
+```sql
+toFloat32OrDefault(expr[, default])
+```
+
+**Arguments**
+
+- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md).
+- `default` (optional) — The default value to return if parsing to type `Float32` is unsuccessful. [Float32](../data-types/float.md).
+
+Supported arguments:
+- Values of type (U)Int8/16/32/64/128/256.
+- String representations of (U)Int8/16/32/128/256.
+- Values of type Float32/64, including `NaN` and `Inf`.
+- String representations of Float32/64, including `NaN` and `Inf` (case-insensitive).
+
+Arguments for which the default value is returned:
+- String representations of binary and hexadecimal values, e.g. `SELECT toFloat32OrDefault('0xc0fe', CAST('0', 'Float32'));`.
+
+**Returned value**
+
+- 32-bit Float value if successful, otherwise returns the default value if passed or `0` if not. [Float32](../data-types/float.md).
+
+**Example**
+
+Query:
+
+```sql
+SELECT
+    toFloat32OrDefault('8', CAST('0', 'Float32')),
+    toFloat32OrDefault('abc', CAST('0', 'Float32'))
+FORMAT Vertical;
+```
+
+Result:
+
+```response
+Row 1:
+──────
+toFloat32OrDefault('8', CAST('0', 'Float32')):   8
+toFloat32OrDefault('abc', CAST('0', 'Float32')): 0
+```
+
+**See also**
+
+- [`toFloat32`](#tofloat32).
+- [`toFloat32OrZero`](#tofloat32orzero).
+- [`toFloat32OrNull`](#tofloat32ornull).
+
+## toFloat64
+
+Converts an input value to a value of type [`Float64`](../data-types/float.md). Throws an exception in case of an error.
+
+**Syntax**
+
+```sql
+toFloat64(expr)
+```
+
+**Arguments**
+
+- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions).
+
+Supported arguments:
+- Values of type (U)Int8/16/32/64/128/256.
+- String representations of (U)Int8/16/32/128/256.
+- Values of type Float32/64, including `NaN` and `Inf`.
+- String representations of type Float32/64, including `NaN` and `Inf` (case-insensitive).
+
+Unsupported arguments:
+- String representations of binary and hexadecimal values, e.g. `SELECT toFloat64('0xc0fe');`.
+
+**Returned value**
+
+- 64-bit floating point value. [Float64](../data-types/float.md).
+
+**Example**
+
+Query:
+
+```sql
+SELECT
+    toFloat64(42.7),
+    toFloat64('42.7'),
+    toFloat64('NaN')
+FORMAT Vertical;
+```
+
+Result:
+
+```response
+Row 1:
+──────
+toFloat64(42.7):   42.7
+toFloat64('42.7'): 42.7
+toFloat64('NaN'):  nan
+```
+
+**See also**
+
+- [`toFloat64OrZero`](#tofloat64orzero).
+- [`toFloat64OrNull`](#tofloat64ornull).
+- [`toFloat64OrDefault`](#tofloat64ordefault).
+
+## toFloat64OrZero
+
+Like [`toFloat64`](#tofloat64), this function converts an input value to a value of type [Float64](../data-types/float.md) but returns `0` in case of an error.
+
+**Syntax**
+
+```sql
+toFloat64OrZero(x)
+```
+
+**Arguments**
+
+- `x` — A String representation of a number. [String](../data-types/string.md).
+
+Supported arguments:
+- String representations of (U)Int8/16/32/128/256, Float32/64.
+
+Unsupported arguments (return `0`):
+- String representations of binary and hexadecimal values, e.g. `SELECT toFloat64OrZero('0xc0fe');`.
+
+**Returned value**
+
+- 64-bit Float value if successful, otherwise `0`. [Float64](../data-types/float.md).
+
+**Example**
+
+Query:
+
+```sql
+SELECT
+    toFloat64OrZero('42.7'),
+    toFloat64OrZero('abc')
+FORMAT Vertical;
+```
+
+Result:
+
+```response
+Row 1:
+──────
+toFloat64OrZero('42.7'): 42.7
+toFloat64OrZero('abc'):  0
+```
+
+**See also**
+
+- [`toFloat64`](#tofloat64).
+- [`toFloat64OrNull`](#tofloat64ornull).
+- [`toFloat64OrDefault`](#tofloat64ordefault).
+
+## toFloat64OrNull
+
+Like [`toFloat64`](#tofloat64), this function converts an input value to a value of type [Float64](../data-types/float.md) but returns `NULL` in case of an error.
+
+**Syntax**
+
+```sql
+toFloat64OrNull(x)
+```
+
+**Arguments**
+
+- `x` — A String representation of a number. [String](../data-types/string.md).
+
+Supported arguments:
+- String representations of (U)Int8/16/32/128/256, Float32/64.
+
+Unsupported arguments (return `\N`):
+- String representations of binary and hexadecimal values, e.g. `SELECT toFloat64OrNull('0xc0fe');`.
+
+**Returned value**
+
+- 64-bit Float value if successful, otherwise `\N`. [Float64](../data-types/float.md).
+
+**Example**
+
+Query:
+
+```sql
+SELECT
+    toFloat64OrNull('42.7'),
+    toFloat64OrNull('abc')
+FORMAT Vertical;
+```
+
+Result:
+
+```response
+Row 1:
+──────
+toFloat64OrNull('42.7'): 42.7
+toFloat64OrNull('abc'):  ᴺᵁᴸᴸ
+```
+
+**See also**
+
+- [`toFloat64`](#tofloat64).
+- [`toFloat64OrZero`](#tofloat64orzero).
+- [`toFloat64OrDefault`](#tofloat64ordefault).
+
+## toFloat64OrDefault
+
+Like [`toFloat64`](#tofloat64), this function converts an input value to a value of type [Float64](../data-types/float.md) but returns the default value in case of an error.
+If no `default` value is passed then `0` is returned in case of an error.
+
+**Syntax**
+
+```sql
+toFloat64OrDefault(expr[, default])
+```
+
+**Arguments**
+
+- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md).
+- `default` (optional) — The default value to return if parsing to type `Float64` is unsuccessful. [Float64](../data-types/float.md).
+
+Supported arguments:
+- Values of type (U)Int8/16/32/64/128/256.
+- String representations of (U)Int8/16/32/128/256.
+- Values of type Float32/64, including `NaN` and `Inf`.
+- String representations of Float32/64, including `NaN` and `Inf` (case-insensitive).
+
+Arguments for which the default value is returned:
+- String representations of binary and hexadecimal values, e.g. `SELECT toFloat64OrDefault('0xc0fe', CAST('0', 'Float64'));`.
+
+**Returned value**
+
+- 64-bit Float value if successful, otherwise returns the default value if passed or `0` if not. [Float64](../data-types/float.md).
+
+**Example**
+
+Query:
+
+```sql
+SELECT
+    toFloat64OrDefault('8', CAST('0', 'Float64')),
+    toFloat64OrDefault('abc', CAST('0', 'Float64'))
+FORMAT Vertical;
+```
+
+Result:
+
+```response
+Row 1:
+──────
+toFloat64OrDefault('8', CAST('0', 'Float64')):   8
+toFloat64OrDefault('abc', CAST('0', 'Float64')): 0
+```
+
+**See also**
+
+- [`toFloat64`](#tofloat64).
+- [`toFloat64OrZero`](#tofloat64orzero).
+- [`toFloat64OrNull`](#tofloat64ornull).

 ## toDate

--- a/docs/en/sql-reference/statements/alter/partition.md
+++ b/docs/en/sql-reference/statements/alter/partition.md
@ -9,6 +9,7 @@ The following operations with [partitions](/docs/en/engines/table-engines/merget

 - [DETACH PARTITION\|PART](#detach-partitionpart) — Moves a partition or part to the `detached` directory and forget it.
 - [DROP PARTITION\|PART](#drop-partitionpart) — Deletes a partition or part.
+- [DROP DETACHED PARTITION\|PART](#drop-detached-partitionpart) - Delete a part or all parts of a partition from `detached`.
 - [FORGET PARTITION](#forget-partition) — Deletes a partition metadata from zookeeper if it's empty.
 - [ATTACH PARTITION\|PART](#attach-partitionpart) — Adds a partition or part from the `detached` directory to the table.
 - [ATTACH PARTITION FROM](#attach-partition-from) — Copies the data partition from one table to another and adds.
@ -68,7 +69,7 @@ ALTER TABLE mt DROP PART 'all_4_4_0';
 ## DROP DETACHED PARTITION\|PART

 ``` sql
-ALTER TABLE table_name [ON CLUSTER cluster] DROP DETACHED PARTITION|PART partition_expr
+ALTER TABLE table_name [ON CLUSTER cluster] DROP DETACHED PARTITION|PART ALL|partition_expr
 ```

 Removes the specified part or all parts of the specified partition from `detached`.
--- a/docs/en/sql-reference/statements/create/table.md
+++ b/docs/en/sql-reference/statements/create/table.md
@ -241,12 +241,12 @@ CREATE OR REPLACE TABLE test
 (
    id UInt64,
    size_bytes Int64,
-    size String Alias formatReadableSize(size_bytes)
+    size String ALIAS formatReadableSize(size_bytes)
 )
 ENGINE = MergeTree
 ORDER BY id;

-INSERT INTO test Values (1, 4678899);
+INSERT INTO test VALUES (1, 4678899);

 SELECT id, size_bytes, size FROM test;
 ┌─id─┬─size_bytes─┬─size─────┐
@ -497,7 +497,7 @@ If you perform a SELECT query mentioning a specific value in an encrypted column
 ```sql
 CREATE TABLE mytable
 (
-    x String Codec(AES_128_GCM_SIV)
+    x String CODEC(AES_128_GCM_SIV)
 )
 ENGINE = MergeTree ORDER BY x;
 ```
--- a/docs/en/sql-reference/statements/delete.md
+++ b/docs/en/sql-reference/statements/delete.md
@ -36,9 +36,10 @@ If you anticipate frequent deletes, consider using a [custom partitioning key](/

 ## Limitations of lightweight `DELETE`

-### Lightweight `DELETE`s do not work with projections
+### Lightweight `DELETE`s with projections

-Currently, `DELETE` does not work for tables with projections. This is because rows in a projection may be affected by a `DELETE` operation and may require the projection to be rebuilt, negatively affecting `DELETE` performance.
+By default, `DELETE` does not work for tables with projections. This is because rows in a projection may be affected by a `DELETE` operation and may require the projection to be rebuilt, negatively affecting `DELETE` performance.
+However, there is an option to change this behavior. By changing setting `lightweight_mutation_projection_mode = 'drop'`, deletes will work with projections.

 ## Performance considerations when using lightweight `DELETE`

--- a/docs/en/sql-reference/table-functions/fuzzQuery.md
+++ b/docs/en/sql-reference/table-functions/fuzzQuery.md
@ -0,0 +1,36 @@
+---
+slug: /en/sql-reference/table-functions/fuzzQuery
+sidebar_position: 75
+sidebar_label: fuzzQuery
+---
+
+# fuzzQuery
+
+Perturbs the given query string with random variations.
+
+``` sql
+fuzzQuery(query[, max_query_length[, random_seed]])
+```
+
+**Arguments**
+
+- `query` (String) - The source query to perform the fuzzing on.
+- `max_query_length` (UInt64) - A maximum length the query can get during the fuzzing process.
+- `random_seed` (UInt64) - A random seed for producing stable results.
+
+**Returned Value**
+
+A table object with a single column containing perturbed query strings.
+
+## Usage Example
+
+``` sql
+SELECT * FROM fuzzQuery('SELECT materialize(\'a\' AS key) GROUP BY key') LIMIT 2;
+```
+
+```
+   ┌─query──────────────────────────────────────────────────────────┐
+1. │ SELECT 'a' AS key GROUP BY key                                 │
+2. │ EXPLAIN PIPELINE compact = true SELECT 'a' AS key GROUP BY key │
+   └────────────────────────────────────────────────────────────────┘
+```
--- a/docs/en/sql-reference/table-functions/hdfs.md
+++ b/docs/en/sql-reference/table-functions/hdfs.md
@ -44,6 +44,7 @@ LIMIT 2
 Paths may use globbing. Files must match the whole path pattern, not only the suffix or prefix.

 - `*` — Represents arbitrarily many characters except `/` but including the empty string.
+- `**` — Represents all files inside a folder recursively.
 - `?` — Represents an arbitrary single character.
 - `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. The strings can contain the `/` symbol.
 - `{N..M}` — Represents any number `>= N` and `<= M`.
--- a/docs/en/sql-reference/table-functions/timeSeriesData.md
+++ b/docs/en/sql-reference/table-functions/timeSeriesData.md
@ -0,0 +1,28 @@
+---
+slug: /en/sql-reference/table-functions/timeSeriesData
+sidebar_position: 145
+sidebar_label: timeSeriesData
+---
+
+# timeSeriesData
+
+`timeSeriesData(db_name.time_series_table)` - Returns the [data](../../engines/table-engines/integrations/time-series.md#data-table) table
+used by table `db_name.time_series_table` which table engine is [TimeSeries](../../engines/table-engines/integrations/time-series.md):
+
+``` sql
+CREATE TABLE db_name.time_series_table ENGINE=TimeSeries DATA data_table
+```
+
+The function also works if the _data_ table is inner:
+
+``` sql
+CREATE TABLE db_name.time_series_table ENGINE=TimeSeries DATA INNER UUID '01234567-89ab-cdef-0123-456789abcdef'
+```
+
+The following queries are equivalent:
+
+``` sql
+SELECT * FROM timeSeriesData(db_name.time_series_table);
+SELECT * FROM timeSeriesData('db_name.time_series_table');
+SELECT * FROM timeSeriesData('db_name', 'time_series_table');
+```
--- a/docs/en/sql-reference/table-functions/timeSeriesMetrics.md
+++ b/docs/en/sql-reference/table-functions/timeSeriesMetrics.md
@ -0,0 +1,28 @@
+---
+slug: /en/sql-reference/table-functions/timeSeriesMetrics
+sidebar_position: 145
+sidebar_label: timeSeriesMetrics
+---
+
+# timeSeriesMetrics
+
+`timeSeriesMetrics(db_name.time_series_table)` - Returns the [metrics](../../engines/table-engines/integrations/time-series.md#metrics-table) table
+used by table `db_name.time_series_table` which table engine is [TimeSeries](../../engines/table-engines/integrations/time-series.md):
+
+``` sql
+CREATE TABLE db_name.time_series_table ENGINE=TimeSeries METRICS metrics_table
+```
+
+The function also works if the _metrics_ table is inner:
+
+``` sql
+CREATE TABLE db_name.time_series_table ENGINE=TimeSeries METRICS INNER UUID '01234567-89ab-cdef-0123-456789abcdef'
+```
+
+The following queries are equivalent:
+
+``` sql
+SELECT * FROM timeSeriesMetrics(db_name.time_series_table);
+SELECT * FROM timeSeriesMetrics('db_name.time_series_table');
+SELECT * FROM timeSeriesMetrics('db_name', 'time_series_table');
+```
--- a/docs/en/sql-reference/table-functions/timeSeriesTags.md
+++ b/docs/en/sql-reference/table-functions/timeSeriesTags.md
@ -0,0 +1,28 @@
+---
+slug: /en/sql-reference/table-functions/timeSeriesTags
+sidebar_position: 145
+sidebar_label: timeSeriesTags
+---
+
+# timeSeriesTags
+
+`timeSeriesTags(db_name.time_series_table)` - Returns the [tags](../../engines/table-engines/integrations/time-series.md#tags-table) table
+used by table `db_name.time_series_table` which table engine is [TimeSeries](../../engines/table-engines/integrations/time-series.md):
+
+``` sql
+CREATE TABLE db_name.time_series_table ENGINE=TimeSeries TAGS tags_table
+```
+
+The function also works if the _tags_ table is inner:
+
+``` sql
+CREATE TABLE db_name.time_series_table ENGINE=TimeSeries TAGS INNER UUID '01234567-89ab-cdef-0123-456789abcdef'
+```
+
+The following queries are equivalent:
+
+``` sql
+SELECT * FROM timeSeriesTags(db_name.time_series_table);
+SELECT * FROM timeSeriesTags('db_name.time_series_table');
+SELECT * FROM timeSeriesTags('db_name', 'time_series_table');
+```
--- a/docs/en/sql-reference/window-functions/lagInFrame.md
+++ b/docs/en/sql-reference/window-functions/lagInFrame.md
@ -1,7 +1,7 @@
 ---
 slug: /en/sql-reference/window-functions/lagInFrame
 sidebar_label: lagInFrame
-sidebar_position: 8
+sidebar_position: 9
 ---

 # lagInFrame
--- a/docs/en/sql-reference/window-functions/leadInFrame.md
+++ b/docs/en/sql-reference/window-functions/leadInFrame.md
@ -1,7 +1,7 @@
 ---
 slug: /en/sql-reference/window-functions/leadInFrame
 sidebar_label: leadInFrame
-sidebar_position: 9
+sidebar_position: 10
 ---

 # leadInFrame
--- a/docs/en/sql-reference/window-functions/percent_rank.md
+++ b/docs/en/sql-reference/window-functions/percent_rank.md
@ -0,0 +1,72 @@
+---
+slug: /en/sql-reference/window-functions/percent_rank
+sidebar_label: percent_rank
+sidebar_position: 8
+---
+
+# percent_rank
+
+returns the relative rank (i.e. percentile) of rows within a window partition.
+
+**Syntax**
+
+Alias: `percentRank` (case-sensitive)
+
+```sql
+percent_rank (column_name)
+  OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column] 
+        [RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]] | [window_name])
+FROM table_name
+WINDOW window_name as ([PARTITION BY grouping_column] [ORDER BY sorting_column] RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
+```
+
+The default and required window frame definition is `RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING`.
+
+For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax).
+
+**Example**
+
+
+Query:
+
+```sql
+CREATE TABLE salaries
+(
+    `team` String,
+    `player` String,
+    `salary` UInt32,
+    `position` String
+)
+Engine = Memory;
+
+INSERT INTO salaries FORMAT Values
+    ('Port Elizabeth Barbarians', 'Gary Chen', 195000, 'F'),
+    ('New Coreystad Archdukes', 'Charles Juarez', 190000, 'F'),
+    ('Port Elizabeth Barbarians', 'Michael Stanley', 150000, 'D'),
+    ('New Coreystad Archdukes', 'Scott Harrison', 150000, 'D'),
+    ('Port Elizabeth Barbarians', 'Robert George', 195000, 'M'),
+    ('South Hampton Seagulls', 'Douglas Benson', 150000, 'M'),
+    ('South Hampton Seagulls', 'James Henderson', 140000, 'M');
+```
+
+```sql
+SELECT player, salary, 
+       percent_rank() OVER (ORDER BY salary DESC) AS percent_rank
+FROM salaries;
+```
+
+Result:
+
+```response
+
+   ┌─player──────────┬─salary─┬───────percent_rank─┐
+1. │ Gary Chen       │ 195000 │                  0 │
+2. │ Robert George   │ 195000 │                  0 │
+3. │ Charles Juarez  │ 190000 │ 0.3333333333333333 │
+4. │ Michael Stanley │ 150000 │                0.5 │
+5. │ Scott Harrison  │ 150000 │                0.5 │
+6. │ Douglas Benson  │ 150000 │                0.5 │
+7. │ James Henderson │ 140000 │                  1 │
+   └─────────────────┴────────┴────────────────────┘
+
+```
--- a/docs/ru/sql-reference/table-functions/file.md
+++ b/docs/ru/sql-reference/table-functions/file.md
@ -81,6 +81,7 @@ SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 U
 Обрабатываться будут те и только те файлы, которые существуют в файловой системе и удовлетворяют всему шаблону пути.

 -   `*` — заменяет любое количество любых символов кроме `/`, включая отсутствие символов.
+-   `**` — Заменяет любое количество любых символов, включая `/`, то есть осуществляет рекурсивный поиск по вложенным директориям.
 -   `?` — заменяет ровно один любой символ.
 -   `{some_string,another_string,yet_another_one}` — заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`. Эти строки также могут содержать символ `/`.
 -   `{N..M}` — заменяет любое число в интервале от `N` до `M` включительно (может содержать ведущие нули).
--- a/docs/ru/sql-reference/table-functions/hdfs.md
+++ b/docs/ru/sql-reference/table-functions/hdfs.md
@ -47,6 +47,7 @@ LIMIT 2


 -   `*` — Заменяет любое количество любых символов (кроме `/`), включая отсутствие символов.
+-   `**` — Заменяет любое количество любых символов, включая `/`, то есть осуществляет рекурсивный поиск по вложенным директориям.
 -   `?` — Заменяет ровно один любой символ.
 -   `{some_string,another_string,yet_another_one}` — Заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`. Эти строки также могут содержать символ `/`.
 -   `{N..M}` — Заменяет любое число в интервале от `N` до `M` включительно (может содержать ведущие нули).
--- a/programs/client/Client.h
+++ b/programs/client/Client.h
@ -11,7 +11,10 @@ class Client : public ClientApplicationBase
 public:
    using Arguments = ClientApplicationBase::Arguments;

-    Client() = default;
+    Client()
+    {
+        fuzzer = QueryFuzzer(randomSeed(), &std::cout, &std::cerr);
+    }

    void initialize(Poco::Util::Application & self) override;

--- a/programs/keeper/Keeper.cpp
+++ b/programs/keeper/Keeper.cpp
@ -38,7 +38,7 @@
 #include <Server/HTTP/HTTPServer.h>
 #include <Server/HTTPHandlerFactory.h>
 #include <Server/KeeperReadinessHandler.h>
-#include <Server/PrometheusMetricsWriter.h>
+#include <Server/PrometheusRequestHandlerFactory.h>
 #include <Server/TCPServer.h>

 #include "Core/Defines.h"
@ -421,7 +421,7 @@ try
            std::lock_guard lock(servers_lock);
            metrics.reserve(servers->size());
            for (const auto & server : *servers)
-                metrics.emplace_back(ProtocolServerMetrics{server.getPortName(), server.currentThreads()});
+                metrics.emplace_back(ProtocolServerMetrics{server.getPortName(), server.currentThreads(), server.refusedConnections()});
            return metrics;
        }
    );
@ -509,14 +509,13 @@ try
                auto address = socketBindListen(socket, listen_host, port);
                socket.setReceiveTimeout(my_http_context->getReceiveTimeout());
                socket.setSendTimeout(my_http_context->getSendTimeout());
-                auto metrics_writer = std::make_shared<KeeperPrometheusMetricsWriter>(config, "prometheus", async_metrics);
                servers->emplace_back(
                    listen_host,
                    port_name,
                    "Prometheus: http://" + address.toString(),
                    std::make_unique<HTTPServer>(
                        std::move(my_http_context),
-                        createPrometheusMainHandlerFactory(*this, config_getter(), metrics_writer, "PrometheusHandler-factory"),
+                        createKeeperPrometheusHandlerFactory(*this, config_getter(), async_metrics, "PrometheusHandler-factory"),
                        server_pool,
                        socket,
                        http_params));
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@ -814,10 +814,11 @@ try

    const size_t physical_server_memory = getMemoryAmount();

-    LOG_INFO(log, "Available RAM: {}; physical cores: {}; logical cores: {}.",
+    LOG_INFO(log, "Available RAM: {}; logical cores: {}; used cores: {}.",
        formatReadableSizeWithBinarySuffix(physical_server_memory),
-        getNumberOfPhysicalCPUCores(),  // on ARM processors it can show only enabled at current moment cores
-        std::thread::hardware_concurrency());
+        std::thread::hardware_concurrency(),
+        getNumberOfPhysicalCPUCores()  // on ARM processors it can show only enabled at current moment cores
+        );

 #if defined(__x86_64__)
    String cpu_info;
@ -918,10 +919,10 @@ try
            metrics.reserve(servers_to_start_before_tables.size() + servers.size());

            for (const auto & server : servers_to_start_before_tables)
-                metrics.emplace_back(ProtocolServerMetrics{server.getPortName(), server.currentThreads()});
+                metrics.emplace_back(ProtocolServerMetrics{server.getPortName(), server.currentThreads(), server.refusedConnections()});

            for (const auto & server : servers)
-                metrics.emplace_back(ProtocolServerMetrics{server.getPortName(), server.currentThreads()});
+                metrics.emplace_back(ProtocolServerMetrics{server.getPortName(), server.currentThreads(), server.refusedConnections()});
            return metrics;
        }
    );
@ -1623,7 +1624,7 @@ try
                concurrent_threads_soft_limit = new_server_settings.concurrent_threads_soft_limit_num;
            if (new_server_settings.concurrent_threads_soft_limit_ratio_to_cores > 0)
            {
-                auto value = new_server_settings.concurrent_threads_soft_limit_ratio_to_cores * std::thread::hardware_concurrency();
+                auto value = new_server_settings.concurrent_threads_soft_limit_ratio_to_cores * getNumberOfPhysicalCPUCores();
                if (value > 0 && value < concurrent_threads_soft_limit)
                    concurrent_threads_soft_limit = value;
            }
--- a/src/AggregateFunctions/WindowFunction.h
+++ b/src/AggregateFunctions/WindowFunction.h
@ -0,0 +1,117 @@
+#pragma once
+#include <AggregateFunctions/IAggregateFunction.h>
+#include <Interpreters/WindowDescription.h>
+#include <Common/AlignedBuffer.h>
+
+
+namespace DB
+{
+namespace ErrorCodes
+{
+extern const int BAD_ARGUMENTS;
+}
+class WindowTransform;
+
+
+// Interface for true window functions. It's not much of an interface, they just
+// accept the guts of WindowTransform and do 'something'. Given a small number of
+// true window functions, and the fact that the WindowTransform internals are
+// pretty much well-defined in domain terms (e.g. frame boundaries), this is
+// somewhat acceptable.
+class IWindowFunction
+{
+public:
+    virtual ~IWindowFunction() = default;
+
+    // Must insert the result for current_row.
+    virtual void windowInsertResultInto(const WindowTransform * transform, size_t function_index) const = 0;
+
+    virtual std::optional<WindowFrame> getDefaultFrame() const { return {}; }
+
+    virtual ColumnPtr castColumn(const Columns &, const std::vector<size_t> &) { return nullptr; }
+
+    /// Is the frame type supported by this function.
+    virtual bool checkWindowFrameType(const WindowTransform * /*transform*/) const { return true; }
+};
+
+// Runtime data for computing one window function.
+struct WindowFunctionWorkspace
+{
+    AggregateFunctionPtr aggregate_function;
+
+    // Cached value of aggregate function isState virtual method
+    bool is_aggregate_function_state = false;
+
+    // This field is set for pure window functions. When set, we ignore the
+    // window_function.aggregate_function, and work through this interface
+    // instead.
+    IWindowFunction * window_function_impl = nullptr;
+
+    std::vector<size_t> argument_column_indices;
+
+    // Will not be initialized for a pure window function.
+    mutable AlignedBuffer aggregate_function_state;
+
+    // Argument columns. Be careful, this is a per-block cache.
+    std::vector<const IColumn *> argument_columns;
+    UInt64 cached_block_number = std::numeric_limits<UInt64>::max();
+};
+
+// A basic implementation for a true window function. It pretends to be an
+// aggregate function, but refuses to work as such.
+struct WindowFunction : public IAggregateFunctionHelper<WindowFunction>, public IWindowFunction
+{
+    std::string name;
+
+    WindowFunction(
+        const std::string & name_, const DataTypes & argument_types_, const Array & parameters_, const DataTypePtr & result_type_)
+        : IAggregateFunctionHelper<WindowFunction>(argument_types_, parameters_, result_type_), name(name_)
+    {
+    }
+
+    bool isOnlyWindowFunction() const override { return true; }
+
+    [[noreturn]] void fail() const
+    {
+        throw Exception(
+            ErrorCodes::BAD_ARGUMENTS, "The function '{}' can only be used as a window function, not as an aggregate function", getName());
+    }
+
+    String getName() const override { return name; }
+    void create(AggregateDataPtr __restrict) const override { }
+    void destroy(AggregateDataPtr __restrict) const noexcept override { }
+    bool hasTrivialDestructor() const override { return true; }
+    size_t sizeOfData() const override { return 0; }
+    size_t alignOfData() const override { return 1; }
+    void add(AggregateDataPtr __restrict, const IColumn **, size_t, Arena *) const override { fail(); }
+    void merge(AggregateDataPtr __restrict, ConstAggregateDataPtr, Arena *) const override { fail(); }
+    void serialize(ConstAggregateDataPtr __restrict, WriteBuffer &, std::optional<size_t>) const override { fail(); }
+    void deserialize(AggregateDataPtr __restrict, ReadBuffer &, std::optional<size_t>, Arena *) const override { fail(); }
+    void insertResultInto(AggregateDataPtr __restrict, IColumn &, Arena *) const override { fail(); }
+};
+
+template <typename State>
+struct StatefulWindowFunction : public WindowFunction
+{
+    StatefulWindowFunction(
+        const std::string & name_, const DataTypes & argument_types_, const Array & parameters_, const DataTypePtr & result_type_)
+        : WindowFunction(name_, argument_types_, parameters_, result_type_)
+    {
+    }
+
+    size_t sizeOfData() const override { return sizeof(State); }
+    size_t alignOfData() const override { return 1; }
+
+    void create(AggregateDataPtr __restrict place) const override { new (place) State(); }
+
+    void destroy(AggregateDataPtr __restrict place) const noexcept override { reinterpret_cast<State *>(place)->~State(); }
+
+    bool hasTrivialDestructor() const override { return std::is_trivially_destructible_v<State>; }
+
+    State & getState(const WindowFunctionWorkspace & workspace) const
+    {
+        return *reinterpret_cast<State *>(workspace.aggregate_function_state.data());
+    }
+};
+
+}
--- a/src/Analyzer/ConstantNode.cpp
+++ b/src/Analyzer/ConstantNode.cpp
@ -5,6 +5,7 @@
 #include <Common/assert_cast.h>
 #include <Common/FieldVisitorToString.h>
 #include <Common/SipHash.h>
+#include <DataTypes/DataTypeDateTime64.h>

 #include <IO/WriteBuffer.h>
 #include <IO/WriteHelpers.h>
@ -162,6 +163,7 @@ QueryTreeNodePtr ConstantNode::cloneImpl() const
 ASTPtr ConstantNode::toASTImpl(const ConvertToASTOptions & options) const
 {
    const auto & constant_value_literal = constant_value->getValue();
+    const auto & constant_value_type = constant_value->getType();
    auto constant_value_ast = std::make_shared<ASTLiteral>(constant_value_literal);

    if (!options.add_cast_for_constants)
@ -169,7 +171,25 @@ ASTPtr ConstantNode::toASTImpl(const ConvertToASTOptions & options) const

    if (requiresCastCall())
    {
-        auto constant_type_name_ast = std::make_shared<ASTLiteral>(constant_value->getType()->getName());
+        /** Value for DateTime64 is Decimal64, which is serialized as a string literal.
+          * If we serialize it as is, DateTime64 would be parsed from that string literal, which can be incorrect.
+          * For example, DateTime64 cannot be parsed from the short value, like '1', while it's a valid Decimal64 value.
+          * It could also lead to ambiguous parsing because we don't know if the string literal represents a date or a Decimal64 literal.
+          * For this reason, we use a string literal representing a date instead of a Decimal64 literal.
+          */
+        if (WhichDataType(constant_value_type->getTypeId()).isDateTime64())
+        {
+            const auto * date_time_type = typeid_cast<const DataTypeDateTime64 *>(constant_value_type.get());
+            DecimalField<Decimal64> decimal_value;
+            if (constant_value_literal.tryGet<DecimalField<Decimal64>>(decimal_value))
+            {
+                WriteBufferFromOwnString ostr;
+                writeDateTimeText(decimal_value.getValue(), date_time_type->getScale(), ostr, date_time_type->getTimeZone());
+                constant_value_ast = std::make_shared<ASTLiteral>(ostr.str());
+            }
+        }
+
+        auto constant_type_name_ast = std::make_shared<ASTLiteral>(constant_value_type->getName());
        return makeASTFunction("_CAST", std::move(constant_value_ast), std::move(constant_type_name_ast));
    }

--- a/src/Analyzer/Passes/ShardNumColumnToFunctionPass.cpp
+++ b/src/Analyzer/Passes/ShardNumColumnToFunctionPass.cpp
@ -46,7 +46,7 @@ public:
            return;

        const auto & storage_snapshot = table_node ? table_node->getStorageSnapshot() : table_function_node->getStorageSnapshot();
-        if (!storage->isVirtualColumn(column.name, storage_snapshot->getMetadataForQuery()))
+        if (!storage->isVirtualColumn(column.name, storage_snapshot->metadata))
            return;

        auto function_node = std::make_shared<FunctionNode>("shardNum");
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -78,6 +78,7 @@ add_headers_and_sources(clickhouse_common_io Common/Scheduler)
 add_headers_and_sources(clickhouse_common_io Common/Scheduler/Nodes)
 add_headers_and_sources(clickhouse_common_io IO)
 add_headers_and_sources(clickhouse_common_io IO/Archives)
+add_headers_and_sources(clickhouse_common_io IO/Protobuf)
 add_headers_and_sources(clickhouse_common_io IO/S3)
 add_headers_and_sources(clickhouse_common_io IO/AzureBlobStorage)
 list (REMOVE_ITEM clickhouse_common_io_sources Common/malloc.cpp Common/new_delete.cpp)
@ -225,6 +226,7 @@ add_object_library(clickhouse_storages_liveview Storages/LiveView)
 add_object_library(clickhouse_storages_windowview Storages/WindowView)
 add_object_library(clickhouse_storages_s3queue Storages/ObjectStorageQueue)
 add_object_library(clickhouse_storages_materializedview Storages/MaterializedView)
+add_object_library(clickhouse_storages_time_series Storages/TimeSeries)
 add_object_library(clickhouse_client Client)
 # Always compile this file with the highest possible level of optimizations, even in Debug builds.
 # https://github.com/ClickHouse/ClickHouse/issues/65745
@ -469,6 +471,7 @@ dbms_target_link_libraries (PUBLIC ch_contrib::sparsehash)

 if (TARGET ch_contrib::protobuf)
    dbms_target_link_libraries (PRIVATE ch_contrib::protobuf)
+    target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::protobuf)
 endif ()

 if (TARGET clickhouse_grpc_protos)
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@ -477,6 +477,8 @@ void ClientBase::onProfileInfo(const ProfileInfo & profile_info)
 {
    if (profile_info.hasAppliedLimit() && output_format)
        output_format->setRowsBeforeLimit(profile_info.getRowsBeforeLimit());
+    if (profile_info.hasAppliedAggregation() && output_format)
+        output_format->setRowsBeforeAggregation(profile_info.getRowsBeforeAggregation());
 }


--- a/src/Client/ClientBase.h
+++ b/src/Client/ClientBase.h
@ -2,7 +2,7 @@


 #include <Client/Suggest.h>
-#include <Client/QueryFuzzer.h>
+#include <Common/QueryFuzzer.h>
 #include <Common/DNSResolver.h>
 #include <Common/InterruptListener.h>
 #include <Common/ProgressIndication.h>
--- a/src/Client/Connection.cpp
+++ b/src/Client/Connection.cpp
@ -1319,7 +1319,7 @@ Progress Connection::receiveProgress() const
 ProfileInfo Connection::receiveProfileInfo() const
 {
    ProfileInfo profile_info;
-    profile_info.read(*in);
+    profile_info.read(*in, server_revision);
    return profile_info;
 }

--- a/src/Client/LocalConnection.cpp
+++ b/src/Client/LocalConnection.cpp
@ -365,7 +365,7 @@ bool LocalConnection::poll(size_t)
        {
            while (pollImpl())
            {
-                LOG_DEBUG(&Poco::Logger::get("LocalConnection"), "Executor timeout encountered, will retry");
+                LOG_TEST(&Poco::Logger::get("LocalConnection"), "Executor timeout encountered, will retry");

                if (needSendProgressOrMetrics())
                    return true;
--- a/src/Common/AsynchronousMetrics.cpp
+++ b/src/Common/AsynchronousMetrics.cpp
@ -1613,7 +1613,7 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update)
 #endif

    {
-        auto get_metric_name_doc = [](const String & name) -> std::pair<const char *, const char *>
+        auto threads_get_metric_name_doc = [](const String & name) -> std::pair<const char *, const char *>
        {
            static std::map<String, std::pair<const char *, const char *>> metric_map =
            {
@ -1637,11 +1637,38 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update)
                return it->second;
        };

+        auto rejected_connections_get_metric_name_doc = [](const String & name) -> std::pair<const char *, const char *>
+        {
+            static std::map<String, std::pair<const char *, const char *>> metric_map =
+                {
+                    {"tcp_port", {"TCPRejectedConnections", "Number of rejected connections for the TCP protocol (without TLS)."}},
+                    {"tcp_port_secure", {"TCPSecureRejectedConnections", "Number of rejected connections for the TCP protocol (with TLS)."}},
+                    {"http_port", {"HTTPRejectedConnections", "Number of rejected connections for the HTTP interface (without TLS)."}},
+                    {"https_port", {"HTTPSecureRejectedConnections", "Number of rejected connections for the HTTPS interface."}},
+                    {"interserver_http_port", {"InterserverRejectedConnections", "Number of rejected connections for the replicas communication protocol (without TLS)."}},
+                    {"interserver_https_port", {"InterserverSecureRejectedConnections", "Number of rejected connections for the replicas communication protocol (with TLS)."}},
+                    {"mysql_port", {"MySQLRejectedConnections", "Number of rejected connections for the MySQL compatibility protocol."}},
+                    {"postgresql_port", {"PostgreSQLRejectedConnections", "Number of rejected connections for the PostgreSQL compatibility protocol."}},
+                    {"grpc_port", {"GRPCRejectedConnections", "Number of rejected connections for the GRPC protocol."}},
+                    {"prometheus.port", {"PrometheusRejectedConnections", "Number of rejected connections for the Prometheus endpoint. Note: prometheus endpoints can be also used via the usual HTTP/HTTPs ports."}},
+                    {"keeper_server.tcp_port", {"KeeperTCPRejectedConnections", "Number of rejected connections for the Keeper TCP protocol (without TLS)."}},
+                    {"keeper_server.tcp_port_secure", {"KeeperTCPSecureRejectedConnections", "Number of rejected connections for the Keeper TCP protocol (with TLS)."}}
+                };
+            auto it = metric_map.find(name);
+            if (it == metric_map.end())
+                return { nullptr, nullptr };
+            else
+                return it->second;
+        };
+
        const auto server_metrics = protocol_server_metrics_func();
        for (const auto & server_metric : server_metrics)
        {
-            if (auto name_doc = get_metric_name_doc(server_metric.port_name); name_doc.first != nullptr)
+            if (auto name_doc = threads_get_metric_name_doc(server_metric.port_name); name_doc.first != nullptr)
                new_values[name_doc.first] = { server_metric.current_threads, name_doc.second };
+
+            if (auto name_doc = rejected_connections_get_metric_name_doc(server_metric.port_name); name_doc.first != nullptr)
+                new_values[name_doc.first] = { server_metric.rejected_connections, name_doc.second };
        }
    }

--- a/src/Common/AsynchronousMetrics.h
+++ b/src/Common/AsynchronousMetrics.h
@ -42,6 +42,7 @@ struct ProtocolServerMetrics
 {
    String port_name;
    size_t current_threads;
+    size_t rejected_connections;
 };

 /** Periodically (by default, each second)
--- a/src/Common/ConcurrentBoundedQueue.h
+++ b/src/Common/ConcurrentBoundedQueue.h
@ -243,7 +243,7 @@ public:
    }

    /// Clear and finish queue
-    void clearAndFinish()
+    void clearAndFinish() noexcept
    {
        {
            std::lock_guard lock(queue_mutex);
--- a/src/Common/ErrorCodes.cpp
+++ b/src/Common/ErrorCodes.cpp
@ -604,6 +604,10 @@
    M(723, PARQUET_EXCEPTION) \
    M(724, TOO_MANY_TABLES) \
    M(725, TOO_MANY_DATABASES) \
+    M(726, UNEXPECTED_HTTP_HEADERS) \
+    M(727, UNEXPECTED_TABLE_ENGINE) \
+    M(728, UNEXPECTED_DATA_TYPE) \
+    M(729, ILLEGAL_TIME_SERIES_TAGS) \
    \
    M(900, DISTRIBUTED_CACHE_ERROR) \
    M(901, CANNOT_USE_DISTRIBUTED_CACHE) \
--- a/src/Common/ErrorHandlers.h
+++ b/src/Common/ErrorHandlers.h
@ -2,6 +2,7 @@

 #include <Poco/ErrorHandler.h>
 #include <Common/Exception.h>
+#include <Common/logger_useful.h>


 /** ErrorHandler for Poco::Thread,
@ -26,8 +27,32 @@ public:
    void exception(const std::exception &)  override { logException(); }
    void exception()                        override { logException(); }

+    void logMessageImpl(Poco::Message::Priority priority, const std::string & msg) override
+    {
+        switch (priority)
+        {
+            case Poco::Message::PRIO_FATAL: [[fallthrough]];
+            case Poco::Message::PRIO_CRITICAL:
+                LOG_FATAL(trace_log, fmt::runtime(msg)); break;
+            case Poco::Message::PRIO_ERROR:
+                LOG_ERROR(trace_log, fmt::runtime(msg)); break;
+            case Poco::Message::PRIO_WARNING:
+                LOG_WARNING(trace_log, fmt::runtime(msg)); break;
+            case Poco::Message::PRIO_NOTICE: [[fallthrough]];
+            case Poco::Message::PRIO_INFORMATION:
+                LOG_INFO(trace_log, fmt::runtime(msg)); break;
+            case Poco::Message::PRIO_DEBUG:
+                LOG_DEBUG(trace_log, fmt::runtime(msg)); break;
+            case Poco::Message::PRIO_TRACE:
+                LOG_TRACE(trace_log, fmt::runtime(msg)); break;
+            case Poco::Message::PRIO_TEST:
+                LOG_TEST(trace_log, fmt::runtime(msg)); break;
+        }
+    }
+
 private:
    LoggerPtr log = getLogger("ServerErrorHandler");
+    LoggerPtr trace_log = getLogger("Poco");

    void logException()
    {
--- a/src/Common/FailPoint.cpp
+++ b/src/Common/FailPoint.cpp
@ -60,6 +60,7 @@ static struct InitFiu
    ONCE(receive_timeout_on_table_status_response) \
    REGULAR(keepermap_fail_drop_data) \
    REGULAR(lazy_pipe_fds_fail_close) \
+    PAUSEABLE(infinite_sleep) \


 namespace FailPoints
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@ -459,6 +459,7 @@ The server successfully detected this situation and will download merged part fr
    M(AzureDeleteObjects, "Number of Azure blob storage API DeleteObject(s) calls.") \
    M(AzureListObjects, "Number of Azure blob storage API ListObjects calls.") \
    M(AzureGetProperties, "Number of Azure blob storage API GetProperties calls.") \
+    M(AzureCreateContainer, "Number of Azure blob storage API CreateContainer calls.") \
    \
    M(DiskAzureGetObject, "Number of Disk Azure API GetObject calls.") \
    M(DiskAzureUpload, "Number of Disk Azure blob storage API Upload calls") \
@ -466,8 +467,9 @@ The server successfully detected this situation and will download merged part fr
    M(DiskAzureCommitBlockList, "Number of Disk Azure blob storage API CommitBlockList calls") \
    M(DiskAzureCopyObject, "Number of Disk Azure blob storage API CopyObject calls") \
    M(DiskAzureListObjects, "Number of Disk Azure blob storage API ListObjects calls.") \
-    M(DiskAzureDeleteObjects, "Number of Azure blob storage API DeleteObject(s) calls.") \
+    M(DiskAzureDeleteObjects, "Number of Disk Azure blob storage API DeleteObject(s) calls.") \
    M(DiskAzureGetProperties, "Number of Disk Azure blob storage API GetProperties calls.") \
+    M(DiskAzureCreateContainer, "Number of Disk Azure blob storage API CreateContainer calls.") \
    \
    M(ReadBufferFromAzureMicroseconds, "Time spent on reading from Azure.") \
    M(ReadBufferFromAzureInitMicroseconds, "Time spent initializing connection to Azure.") \
--- a/src/Common/QueryFuzzer.cpp
+++ b/src/Common/QueryFuzzer.cpp
@ -68,22 +68,21 @@ Field QueryFuzzer::getRandomField(int type)
    {
    case 0:
    {
-        return bad_int64_values[fuzz_rand() % (sizeof(bad_int64_values)
-                / sizeof(*bad_int64_values))];
+        return bad_int64_values[fuzz_rand() % std::size(bad_int64_values)];
    }
    case 1:
    {
        static constexpr double values[]
                = {NAN, INFINITY, -INFINITY, 0., -0., 0.0001, 0.5, 0.9999,
                   1., 1.0001, 2., 10.0001, 100.0001, 1000.0001, 1e10, 1e20,
-                  FLT_MIN, FLT_MIN + FLT_EPSILON, FLT_MAX, FLT_MAX + FLT_EPSILON}; return values[fuzz_rand() % (sizeof(values) / sizeof(*values))];
+                  FLT_MIN, FLT_MIN + FLT_EPSILON, FLT_MAX, FLT_MAX + FLT_EPSILON}; return values[fuzz_rand() % std::size(values)];
    }
    case 2:
    {
        static constexpr UInt64 scales[] = {0, 1, 2, 10};
        return DecimalField<Decimal64>(
-            bad_int64_values[fuzz_rand() % (sizeof(bad_int64_values) / sizeof(*bad_int64_values))],
-            static_cast<UInt32>(scales[fuzz_rand() % (sizeof(scales) / sizeof(*scales))])
+            bad_int64_values[fuzz_rand() % std::size(bad_int64_values)],
+            static_cast<UInt32>(scales[fuzz_rand() % std::size(scales)])
        );
    }
    default:
@ -165,7 +164,8 @@ Field QueryFuzzer::fuzzField(Field field)
        {
            size_t pos = fuzz_rand() % arr.size();
            arr.erase(arr.begin() + pos);
-            std::cerr << "erased\n";
+            if (debug_stream)
+                *debug_stream << "erased\n";
        }

        if (fuzz_rand() % 5 == 0)
@ -174,12 +174,14 @@ Field QueryFuzzer::fuzzField(Field field)
            {
                size_t pos = fuzz_rand() % arr.size();
                arr.insert(arr.begin() + pos, fuzzField(arr[pos]));
-                std::cerr << fmt::format("inserted (pos {})\n", pos);
+                if (debug_stream)
+                    *debug_stream << fmt::format("inserted (pos {})\n", pos);
            }
            else
            {
                arr.insert(arr.begin(), getRandomField(0));
-                std::cerr << "inserted (0)\n";
+                if (debug_stream)
+                    *debug_stream << "inserted (0)\n";
            }

        }
@ -197,7 +199,9 @@ Field QueryFuzzer::fuzzField(Field field)
        {
            size_t pos = fuzz_rand() % arr.size();
            arr.erase(arr.begin() + pos);
-            std::cerr << "erased\n";
+
+            if (debug_stream)
+                *debug_stream << "erased\n";
        }

        if (fuzz_rand() % 5 == 0)
@ -206,12 +210,16 @@ Field QueryFuzzer::fuzzField(Field field)
            {
                size_t pos = fuzz_rand() % arr.size();
                arr.insert(arr.begin() + pos, fuzzField(arr[pos]));
-                std::cerr << fmt::format("inserted (pos {})\n", pos);
+
+                if (debug_stream)
+                    *debug_stream << fmt::format("inserted (pos {})\n", pos);
            }
            else
            {
                arr.insert(arr.begin(), getRandomField(0));
-                std::cerr << "inserted (0)\n";
+
+                if (debug_stream)
+                    *debug_stream << "inserted (0)\n";
            }

        }
@ -344,7 +352,8 @@ void QueryFuzzer::fuzzOrderByList(IAST * ast)
        }
        else
        {
-            std::cerr << "No random column.\n";
+            if (debug_stream)
+                *debug_stream << "No random column.\n";
        }
    }

@ -378,7 +387,8 @@ void QueryFuzzer::fuzzColumnLikeExpressionList(IAST * ast)
        if (col)
            impl->children.insert(pos, col);
        else
-            std::cerr << "No random column.\n";
+            if (debug_stream)
+                *debug_stream << "No random column.\n";
    }

    // We don't have to recurse here to fuzz the children, this is handled by
@ -1361,11 +1371,15 @@ void QueryFuzzer::fuzzMain(ASTPtr & ast)
    collectFuzzInfoMain(ast);
    fuzz(ast);

-    std::cout << std::endl;
-    WriteBufferFromOStream ast_buf(std::cout, 4096);
-    formatAST(*ast, ast_buf, false /*highlight*/);
-    ast_buf.finalize();
-    std::cout << std::endl << std::endl;
+    if (out_stream)
+    {
+        *out_stream << std::endl;
+
+        WriteBufferFromOStream ast_buf(*out_stream, 4096);
+        formatAST(*ast, ast_buf, false /*highlight*/);
+        ast_buf.finalize();
+        *out_stream << std::endl << std::endl;
+    }
 }

 }
--- a/src/Common/QueryFuzzer.h
+++ b/src/Common/QueryFuzzer.h
@ -35,9 +35,31 @@ struct ASTWindowDefinition;
 * queries, so you want to feed it a lot of queries to get some interesting mix
 * of them. Normally we feed SQL regression tests to it.
 */
-struct QueryFuzzer
+class QueryFuzzer
 {
-    pcg64 fuzz_rand{randomSeed()};
+public:
+    explicit QueryFuzzer(pcg64 fuzz_rand_ = randomSeed(), std::ostream * out_stream_ = nullptr, std::ostream * debug_stream_ = nullptr)
+        : fuzz_rand(fuzz_rand_)
+        , out_stream(out_stream_)
+        , debug_stream(debug_stream_)
+    {
+    }
+
+    // This is the only function you have to call -- it will modify the passed
+    // ASTPtr to point to new AST with some random changes.
+    void fuzzMain(ASTPtr & ast);
+
+    ASTs getInsertQueriesForFuzzedTables(const String & full_query);
+    ASTs getDropQueriesForFuzzedTables(const ASTDropQuery & drop_query);
+    void notifyQueryFailed(ASTPtr ast);
+
+    static bool isSuitableForFuzzing(const ASTCreateQuery & create);
+
+private:
+    pcg64 fuzz_rand;
+
+    std::ostream * out_stream = nullptr;
+    std::ostream * debug_stream = nullptr;

    // We add elements to expression lists with fixed probability. Some elements
    // are so large, that the expected number of elements we add to them is
@ -66,10 +88,6 @@ struct QueryFuzzer
    std::unordered_map<std::string, size_t> index_of_fuzzed_table;
    std::set<IAST::Hash> created_tables_hashes;

-    // This is the only function you have to call -- it will modify the passed
-    // ASTPtr to point to new AST with some random changes.
-    void fuzzMain(ASTPtr & ast);
-
    // Various helper functions follow, normally you shouldn't have to call them.
    Field getRandomField(int type);
    Field fuzzField(Field field);
@ -77,9 +95,6 @@ struct QueryFuzzer
    ASTPtr getRandomExpressionList();
    DataTypePtr fuzzDataType(DataTypePtr type);
    DataTypePtr getRandomType();
-    ASTs getInsertQueriesForFuzzedTables(const String & full_query);
-    ASTs getDropQueriesForFuzzedTables(const ASTDropQuery & drop_query);
-    void notifyQueryFailed(ASTPtr ast);
    void replaceWithColumnLike(ASTPtr & ast);
    void replaceWithTableLike(ASTPtr & ast);
    void fuzzOrderByElement(ASTOrderByElement * elem);
@ -102,8 +117,6 @@ struct QueryFuzzer
    void addTableLike(ASTPtr ast);
    void addColumnLike(ASTPtr ast);
    void collectFuzzInfoRecurse(ASTPtr ast);
-
-    static bool isSuitableForFuzzing(const ASTCreateQuery & create);
 };

 }
--- a/src/Common/Scheduler/Nodes/tests/gtest_resource_class_fair.cpp
+++ b/src/Common/Scheduler/Nodes/tests/gtest_resource_class_fair.cpp
@ -8,7 +8,9 @@ using namespace DB;

 using ResourceTest = ResourceTestClass;

-TEST(SchedulerFairPolicy, Factory)
+/// Tests disabled because of leaks in the test themselves: https://github.com/ClickHouse/ClickHouse/issues/67678
+
+TEST(DISABLED_SchedulerFairPolicy, Factory)
 {
    ResourceTest t;

@ -17,7 +19,7 @@ TEST(SchedulerFairPolicy, Factory)
    EXPECT_TRUE(dynamic_cast<FairPolicy *>(fair.get()) != nullptr);
 }

-TEST(SchedulerFairPolicy, FairnessWeights)
+TEST(DISABLED_SchedulerFairPolicy, FairnessWeights)
 {
    ResourceTest t;

@ -41,7 +43,7 @@ TEST(SchedulerFairPolicy, FairnessWeights)
    t.consumed("B", 20);
 }

-TEST(SchedulerFairPolicy, Activation)
+TEST(DISABLED_SchedulerFairPolicy, Activation)
 {
    ResourceTest t;

@ -77,7 +79,7 @@ TEST(SchedulerFairPolicy, Activation)
    t.consumed("B", 10);
 }

-TEST(SchedulerFairPolicy, FairnessMaxMin)
+TEST(DISABLED_SchedulerFairPolicy, FairnessMaxMin)
 {
    ResourceTest t;

@ -101,7 +103,7 @@ TEST(SchedulerFairPolicy, FairnessMaxMin)
    t.consumed("A", 20);
 }

-TEST(SchedulerFairPolicy, HierarchicalFairness)
+TEST(DISABLED_SchedulerFairPolicy, HierarchicalFairness)
 {
    ResourceTest t;

--- a/src/Common/Scheduler/Nodes/tests/gtest_resource_class_priority.cpp
+++ b/src/Common/Scheduler/Nodes/tests/gtest_resource_class_priority.cpp
@ -8,7 +8,9 @@ using namespace DB;

 using ResourceTest = ResourceTestClass;

-TEST(SchedulerPriorityPolicy, Factory)
+/// Tests disabled because of leaks in the test themselves: https://github.com/ClickHouse/ClickHouse/issues/67678
+
+TEST(DISABLED_SchedulerPriorityPolicy, Factory)
 {
    ResourceTest t;

@ -17,7 +19,7 @@ TEST(SchedulerPriorityPolicy, Factory)
    EXPECT_TRUE(dynamic_cast<PriorityPolicy *>(prio.get()) != nullptr);
 }

-TEST(SchedulerPriorityPolicy, Priorities)
+TEST(DISABLED_SchedulerPriorityPolicy, Priorities)
 {
    ResourceTest t;

@ -51,7 +53,7 @@ TEST(SchedulerPriorityPolicy, Priorities)
    t.consumed("C", 0);
 }

-TEST(SchedulerPriorityPolicy, Activation)
+TEST(DISABLED_SchedulerPriorityPolicy, Activation)
 {
    ResourceTest t;

@ -92,7 +94,7 @@ TEST(SchedulerPriorityPolicy, Activation)
    t.consumed("C", 0);
 }

-TEST(SchedulerPriorityPolicy, SinglePriority)
+TEST(DISABLED_SchedulerPriorityPolicy, SinglePriority)
 {
    ResourceTest t;

--- a/src/Common/Scheduler/Nodes/tests/gtest_throttler_constraint.cpp
+++ b/src/Common/Scheduler/Nodes/tests/gtest_throttler_constraint.cpp
@ -10,7 +10,9 @@ using namespace DB;

 using ResourceTest = ResourceTestClass;

-TEST(SchedulerThrottlerConstraint, LeakyBucketConstraint)
+/// Tests disabled because of leaks in the test themselves: https://github.com/ClickHouse/ClickHouse/issues/67678
+
+TEST(DISABLED_SchedulerThrottlerConstraint, LeakyBucketConstraint)
 {
    ResourceTest t;
    EventQueue::TimePoint start = std::chrono::system_clock::now();
@ -40,7 +42,7 @@ TEST(SchedulerThrottlerConstraint, LeakyBucketConstraint)
    t.consumed("A", 10);
 }

-TEST(SchedulerThrottlerConstraint, Unlimited)
+TEST(DISABLED_SchedulerThrottlerConstraint, Unlimited)
 {
    ResourceTest t;
    EventQueue::TimePoint start = std::chrono::system_clock::now();
@ -57,7 +59,7 @@ TEST(SchedulerThrottlerConstraint, Unlimited)
    }
 }

-TEST(SchedulerThrottlerConstraint, Pacing)
+TEST(DISABLED_SchedulerThrottlerConstraint, Pacing)
 {
    ResourceTest t;
    EventQueue::TimePoint start = std::chrono::system_clock::now();
@ -77,7 +79,7 @@ TEST(SchedulerThrottlerConstraint, Pacing)
    }
 }

-TEST(SchedulerThrottlerConstraint, BucketFilling)
+TEST(DISABLED_SchedulerThrottlerConstraint, BucketFilling)
 {
    ResourceTest t;
    EventQueue::TimePoint start = std::chrono::system_clock::now();
@ -111,7 +113,7 @@ TEST(SchedulerThrottlerConstraint, BucketFilling)
    t.consumed("A", 3);
 }

-TEST(SchedulerThrottlerConstraint, PeekAndAvgLimits)
+TEST(DISABLED_SchedulerThrottlerConstraint, PeekAndAvgLimits)
 {
    ResourceTest t;
    EventQueue::TimePoint start = std::chrono::system_clock::now();
@ -139,7 +141,7 @@ TEST(SchedulerThrottlerConstraint, PeekAndAvgLimits)
    }
 }

-TEST(SchedulerThrottlerConstraint, ThrottlerAndFairness)
+TEST(DISABLED_SchedulerThrottlerConstraint, ThrottlerAndFairness)
 {
    ResourceTest t;
    EventQueue::TimePoint start = std::chrono::system_clock::now();
--- a/src/Common/SignalHandlers.cpp
+++ b/src/Common/SignalHandlers.cpp
@ -629,6 +629,7 @@ void HandledSignals::setupTerminateHandler()
 void HandledSignals::setupCommonDeadlySignalHandlers()
 {
    /// SIGTSTP is added for debugging purposes. To output a stack trace of any running thread at anytime.
+    /// NOTE: that it is also used by clickhouse-test wrapper
    addSignalHandler({SIGABRT, SIGSEGV, SIGILL, SIGBUS, SIGSYS, SIGFPE, SIGPIPE, SIGTSTP, SIGTRAP}, signalHandler, true);

 #if defined(SANITIZER)
--- a/src/Common/getNumberOfPhysicalCPUCores.h
+++ b/src/Common/getNumberOfPhysicalCPUCores.h
@ -1,4 +1,5 @@
 #pragma once

 /// Get number of CPU cores without hyper-threading.
+/// The calculation respects possible cgroups limits.
 unsigned getNumberOfPhysicalCPUCores();
--- a/src/Common/tests/gtest_lsan.cpp
+++ b/src/Common/tests/gtest_lsan.cpp
@ -14,20 +14,21 @@
 /// because of broken getauxval() [1].
 ///
 ///   [1]: https://github.com/ClickHouse/ClickHouse/pull/33957
-TEST(Common, LSan)
+TEST(SanitizerDeathTest, LSan)
 {
-    int sanitizers_exit_code = 1;
-
-    ASSERT_EXIT({
-        std::thread leak_in_thread([]()
+    EXPECT_DEATH(
        {
-            void * leak = malloc(4096);
-            ASSERT_NE(leak, nullptr);
-        });
-        leak_in_thread.join();
+            std::thread leak_in_thread(
+                []()
+                {
+                    void * leak = malloc(4096);
+                    ASSERT_NE(leak, nullptr);
+                });
+            leak_in_thread.join();

-        __lsan_do_leak_check();
-    }, ::testing::ExitedWithCode(sanitizers_exit_code), ".*LeakSanitizer: detected memory leaks.*");
+            __lsan_do_leak_check();
+        },
+        ".*LeakSanitizer: detected memory leaks.*");
 }

 #endif
--- a/src/Compression/fuzzers/CMakeLists.txt
+++ b/src/Compression/fuzzers/CMakeLists.txt
@ -5,19 +5,19 @@
 # If you want really small size of the resulted binary, just link with fuzz_compression and clickhouse_common_io

 clickhouse_add_executable (compressed_buffer_fuzzer compressed_buffer_fuzzer.cpp)
-target_link_libraries (compressed_buffer_fuzzer PRIVATE dbms)
+target_link_libraries (compressed_buffer_fuzzer PRIVATE dbms clickhouse_functions)

 clickhouse_add_executable (lz4_decompress_fuzzer lz4_decompress_fuzzer.cpp)
-target_link_libraries (lz4_decompress_fuzzer PUBLIC dbms ch_contrib::lz4)
+target_link_libraries (lz4_decompress_fuzzer PUBLIC dbms ch_contrib::lz4 clickhouse_functions)

 clickhouse_add_executable (delta_decompress_fuzzer delta_decompress_fuzzer.cpp)
-target_link_libraries (delta_decompress_fuzzer PRIVATE dbms)
+target_link_libraries (delta_decompress_fuzzer PRIVATE dbms clickhouse_functions)

 clickhouse_add_executable (double_delta_decompress_fuzzer double_delta_decompress_fuzzer.cpp)
-target_link_libraries (double_delta_decompress_fuzzer PRIVATE dbms)
+target_link_libraries (double_delta_decompress_fuzzer PRIVATE dbms clickhouse_functions)

 clickhouse_add_executable (encrypted_decompress_fuzzer encrypted_decompress_fuzzer.cpp)
-target_link_libraries (encrypted_decompress_fuzzer PRIVATE dbms)
+target_link_libraries (encrypted_decompress_fuzzer PRIVATE dbms clickhouse_functions)

 clickhouse_add_executable (gcd_decompress_fuzzer gcd_decompress_fuzzer.cpp)
-target_link_libraries (gcd_decompress_fuzzer PRIVATE dbms)
+target_link_libraries (gcd_decompress_fuzzer PRIVATE dbms clickhouse_functions)
--- a/src/Core/ProtocolDefines.h
+++ b/src/Core/ProtocolDefines.h
@ -81,6 +81,8 @@ static constexpr auto DBMS_MIN_REVISION_WITH_TABLE_READ_ONLY_CHECK = 54467;

 static constexpr auto DBMS_MIN_REVISION_WITH_SYSTEM_KEYWORDS_TABLE = 54468;

+static constexpr auto DBMS_MIN_REVISION_WITH_ROWS_BEFORE_AGGREGATION = 54469;
+
 /// Version of ClickHouse TCP protocol.
 ///
 /// Should be incremented manually on protocol changes.
@ -88,6 +90,6 @@ static constexpr auto DBMS_MIN_REVISION_WITH_SYSTEM_KEYWORDS_TABLE = 54468;
 /// NOTE: DBMS_TCP_PROTOCOL_VERSION has nothing common with VERSION_REVISION,
 /// later is just a number for server version (one number instead of commit SHA)
 /// for simplicity (sometimes it may be more convenient in some use cases).
-static constexpr auto DBMS_TCP_PROTOCOL_VERSION = 54468;
+static constexpr auto DBMS_TCP_PROTOCOL_VERSION = 54469;

 }
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -690,6 +690,7 @@ class IColumn;
    M(UInt64, max_size_to_preallocate_for_joins, 100'000'000, "For how many elements it is allowed to preallocate space in all hash tables in total before join", 0) \
    \
    M(Bool, kafka_disable_num_consumers_limit, false, "Disable limit on kafka_num_consumers that depends on the number of available CPU cores", 0) \
+    M(Bool, allow_experimental_kafka_offsets_storage_in_keeper, false, "Allow experimental feature to store Kafka related offsets in ClickHouse Keeper. When enabled a ClickHouse Keeper path and replica name can be specified to the Kafka table engine. As a result instead of the regular Kafka engine, a new type of storage engine will be used that stores the committed offsets primarily in ClickHouse Keeper", 0) \
    M(Bool, enable_software_prefetch_in_aggregation, true, "Enable use of software prefetch in aggregation", 0) \
    M(Bool, allow_aggregate_partitions_independently, false, "Enable independent aggregation of partitions on separate threads when partition key suits group by key. Beneficial when number of partitions close to number of cores and partitions have roughly the same size", 0) \
    M(Bool, force_aggregate_partitions_independently, false, "Force the use of optimization when it is applicable, but heuristics decided not to use it", 0) \
@ -893,6 +894,8 @@ class IColumn;
    M(Bool, optimize_distinct_in_order, true, "Enable DISTINCT optimization if some columns in DISTINCT form a prefix of sorting. For example, prefix of sorting key in merge tree or ORDER BY statement", 0) \
    M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \
    M(UInt64, extract_key_value_pairs_max_pairs_per_row, 1000, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory.", 0) ALIAS(extract_kvp_max_pairs_per_row) \
+    M(Bool, restore_replace_external_engines_to_null, false, "Replace all the external table engines to Null on restore. Useful for testing purposes", 0) \
+    M(Bool, restore_replace_external_table_functions_to_null, false, "Replace all table functions to Null on restore. Useful for testing purposes", 0) \
    \
    \
    /* ###################################### */ \
@ -903,6 +906,7 @@ class IColumn;
    M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \
    M(Bool, allow_experimental_hash_functions, false, "Enable experimental hash functions", 0) \
    M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \
+    M(Bool, allow_experimental_time_series_table, false, "Allows experimental TimeSeries table engine", 0) \
    M(Bool, allow_experimental_variant_type, false, "Allow Variant data type", 0) \
    M(Bool, allow_experimental_dynamic_type, false, "Allow Dynamic data type", 0) \
    M(Bool, allow_experimental_annoy_index, false, "Allows to use Annoy index. Disabled by default because this feature is experimental", 0) \
@ -1242,6 +1246,7 @@ class IColumn;
    M(Bool, insert_distributed_one_random_shard, false, "If setting is enabled, inserting into distributed table will choose a random shard to write when there is no sharding key", 0) \
    \
    M(Bool, exact_rows_before_limit, false, "When enabled, ClickHouse will provide exact value for rows_before_limit_at_least statistic, but with the cost that the data before limit will have to be read completely", 0) \
+    M(Bool, rows_before_aggregation, false, "When enabled, ClickHouse will provide exact value for rows_before_aggregation statistic, represents the number of rows read before aggregation", 0) \
    M(UInt64, cross_to_inner_join_rewrite, 1, "Use inner join instead of comma/cross join if there are joining expressions in the WHERE section. Values: 0 - no rewrite, 1 - apply if possible for comma/cross, 2 - force rewrite all comma joins, cross - if possible", 0) \
    \
    M(Bool, output_format_arrow_low_cardinality_as_dictionary, false, "Enable output LowCardinality type as Dictionary Arrow type", 0) \
--- a/src/Core/SettingsChangesHistory.cpp
+++ b/src/Core/SettingsChangesHistory.cpp
@ -75,11 +75,16 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
    },
    {"24.8",
        {
+            {"rows_before_aggregation", true, true, "Provide exact value for rows_before_aggregation statistic, represents the number of rows read before aggregation"},
+            {"restore_replace_external_table_functions_to_null", false, false, "New setting."},
+            {"restore_replace_external_engines_to_null", false, false, "New setting."},
            {"input_format_json_max_depth", 1000000, 1000, "It was unlimited in previous versions, but that was unsafe."},
            {"merge_tree_min_bytes_per_task_for_remote_reading", 4194304, 2097152, "Value is unified with `filesystem_prefetch_min_bytes_for_single_read_task`"},
+            {"allow_experimental_kafka_offsets_storage_in_keeper", false, false, "Allow the usage of experimental Kafka storage engine that stores the committed offsets in ClickHouse Keeper"},
            {"allow_archive_path_syntax", true, true, "Added new setting to allow disabling archive path syntax."},
            {"input_format_try_infer_variants", false, false, "Try to infer Variant type in text formats when there is more than one possible type for column/array elements"},
            {"input_format_json_ignore_key_case", false, false, "Ignore json key case while read json field from string."},
+            {"allow_experimental_time_series_table", false, false, "Added new setting to allow the TimeSeries table engine"},
            {"enable_analyzer", 1, 1, "Added an alias to a setting `allow_experimental_analyzer`."},
        }
    },
--- a/src/Core/SettingsEnums.cpp
+++ b/src/Core/SettingsEnums.cpp
@ -177,6 +177,11 @@ IMPLEMENT_SETTING_ENUM(LightweightMutationProjectionMode, ErrorCodes::BAD_ARGUME
    {{"throw", LightweightMutationProjectionMode::THROW},
     {"drop", LightweightMutationProjectionMode::DROP}})

+IMPLEMENT_SETTING_ENUM(DeduplicateMergeProjectionMode, ErrorCodes::BAD_ARGUMENTS,
+    {{"throw", DeduplicateMergeProjectionMode::THROW},
+     {"drop", DeduplicateMergeProjectionMode::DROP},
+     {"rebuild", DeduplicateMergeProjectionMode::REBUILD}})
+
 IMPLEMENT_SETTING_AUTO_ENUM(LocalFSReadMethod, ErrorCodes::BAD_ARGUMENTS)

 IMPLEMENT_SETTING_ENUM(ParquetVersion, ErrorCodes::BAD_ARGUMENTS,
--- a/src/Core/SettingsEnums.h
+++ b/src/Core/SettingsEnums.h
@ -315,6 +315,15 @@ enum class LightweightMutationProjectionMode : uint8_t

 DECLARE_SETTING_ENUM(LightweightMutationProjectionMode)

+enum class DeduplicateMergeProjectionMode : uint8_t
+{
+    THROW,
+    DROP,
+    REBUILD,
+};
+
+DECLARE_SETTING_ENUM(DeduplicateMergeProjectionMode)
+
 DECLARE_SETTING_ENUM(LocalFSReadMethod)

 enum class ObjectStorageQueueMode : uint8_t
--- a/src/DataTypes/DataTypeAggregateFunction.cpp
+++ b/src/DataTypes/DataTypeAggregateFunction.cpp
@ -33,6 +33,16 @@ namespace ErrorCodes
    extern const int LOGICAL_ERROR;
 }

+
+DataTypeAggregateFunction::DataTypeAggregateFunction(AggregateFunctionPtr function_, const DataTypes & argument_types_,
+                            const Array & parameters_, std::optional<size_t> version_)
+    : function(std::move(function_))
+    , argument_types(argument_types_)
+    , parameters(parameters_)
+    , version(version_)
+{
+}
+
 String DataTypeAggregateFunction::getFunctionName() const
 {
    return function->getName();
--- a/src/DataTypes/DataTypeAggregateFunction.h
+++ b/src/DataTypes/DataTypeAggregateFunction.h
@ -30,13 +30,7 @@ public:
    static constexpr bool is_parametric = true;

    DataTypeAggregateFunction(AggregateFunctionPtr function_, const DataTypes & argument_types_,
-                              const Array & parameters_, std::optional<size_t> version_ = std::nullopt)
-        : function(std::move(function_))
-        , argument_types(argument_types_)
-        , parameters(parameters_)
-        , version(version_)
-    {
-    }
+                              const Array & parameters_, std::optional<size_t> version_ = std::nullopt);

    size_t getVersion() const;

--- a/src/DataTypes/IDataType.cpp
+++ b/src/DataTypes/IDataType.cpp
@ -90,7 +90,9 @@ void IDataType::forEachSubcolumn(
            {
                auto name = ISerialization::getSubcolumnNameForStream(subpath, prefix_len);
                auto subdata = ISerialization::createFromPath(subpath, prefix_len);
-                callback(subpath, name, subdata);
+                auto path_copy = subpath;
+                path_copy.resize(prefix_len);
+                callback(path_copy, name, subdata);
            }
            subpath[i].visited = true;
        }
--- a/src/DataTypes/ObjectUtils.cpp
+++ b/src/DataTypes/ObjectUtils.cpp
@ -8,6 +8,7 @@
 #include <DataTypes/DataTypeNothing.h>
 #include <DataTypes/DataTypeArray.h>
 #include <DataTypes/DataTypeMap.h>
+#include <DataTypes/DataTypeTuple.h>
 #include <DataTypes/DataTypeNullable.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/DataTypeNested.h>
@ -66,6 +67,36 @@ DataTypePtr getBaseTypeOfArray(const DataTypePtr & type)
    return last_array ? last_array->getNestedType() : type;
 }

+DataTypePtr getBaseTypeOfArray(DataTypePtr type, const Names & tuple_elements)
+{
+    auto it = tuple_elements.begin();
+    while (true)
+    {
+        if (const auto * type_array = typeid_cast<const DataTypeArray *>(type.get()))
+        {
+            type = type_array->getNestedType();
+        }
+        else if (const auto * type_tuple = typeid_cast<const DataTypeTuple *>(type.get()))
+        {
+            if (it == tuple_elements.end())
+                break;
+
+            auto pos = type_tuple->tryGetPositionByName(*it);
+            if (!pos)
+                break;
+
+            ++it;
+            type = type_tuple->getElement(*pos);
+        }
+        else
+        {
+            break;
+        }
+    }
+
+    return type;
+}
+
 ColumnPtr getBaseColumnOfArray(const ColumnPtr & column)
 {
    /// Get raw pointers to avoid extra copying of column pointers.
--- a/src/DataTypes/ObjectUtils.h
+++ b/src/DataTypes/ObjectUtils.h
@ -27,6 +27,9 @@ size_t getNumberOfDimensions(const IColumn & column);
 /// Returns type of scalars of Array of arbitrary dimensions.
 DataTypePtr getBaseTypeOfArray(const DataTypePtr & type);

+/// The same as above but takes into account Tuples of Nested.
+DataTypePtr getBaseTypeOfArray(DataTypePtr type, const Names & tuple_elements);
+
 /// Returns Array type with requested scalar type and number of dimensions.
 DataTypePtr createArrayOfType(DataTypePtr type, size_t num_dimensions);

--- a/src/DataTypes/Serializations/ISerialization.h
+++ b/src/DataTypes/Serializations/ISerialization.h
@ -195,7 +195,7 @@ public:
        /// Types of substreams that can have arbitrary name.
        static const std::set<Type> named_types;

-        Type type;
+        Type type = Type::Regular;

        /// The name of a variant element type.
        String variant_element_name;
@ -212,6 +212,7 @@ public:
        /// Flag, that may help to traverse substream paths.
        mutable bool visited = false;

+        Substream() = default;
        Substream(Type type_) : type(type_) {} /// NOLINT
        String toString() const;
    };
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@ -12,6 +12,7 @@
 #include <Common/ZooKeeper/KeeperException.h>
 #include <Common/ZooKeeper/Types.h>
 #include <Common/ZooKeeper/ZooKeeper.h>
+#include <Common/ZooKeeper/IKeeper.h>
 #include <Common/PoolId.h>
 #include <Core/ServerSettings.h>
 #include <Core/Settings.h>
@ -338,9 +339,12 @@ ClusterPtr DatabaseReplicated::getClusterImpl(bool all_groups) const
    return std::make_shared<Cluster>(getContext()->getSettingsRef(), shards, params);
 }

-std::vector<UInt8> DatabaseReplicated::tryGetAreReplicasActive(const ClusterPtr & cluster_) const
+ReplicasInfo DatabaseReplicated::tryGetReplicasInfo(const ClusterPtr & cluster_) const
 {
    Strings paths;
+
+    paths.emplace_back(fs::path(zookeeper_path) / "max_log_ptr");
+
    const auto & addresses_with_failover = cluster_->getShardsAddresses();
    const auto & shards_info = cluster_->getShardsInfo();
    for (size_t shard_index = 0; shard_index < shards_info.size(); ++shard_index)
@ -349,22 +353,50 @@ std::vector<UInt8> DatabaseReplicated::tryGetAreReplicasActive(const ClusterPtr
        {
            String full_name = getFullReplicaName(replica.database_shard_name, replica.database_replica_name);
            paths.emplace_back(fs::path(zookeeper_path) / "replicas" / full_name / "active");
+            paths.emplace_back(fs::path(zookeeper_path) / "replicas" / full_name / "log_ptr");
        }
    }

    try
    {
        auto current_zookeeper = getZooKeeper();
-        auto res = current_zookeeper->exists(paths);
+        auto zk_res = current_zookeeper->tryGet(paths);

-        std::vector<UInt8> statuses;
-        statuses.resize(paths.size());
+        auto max_log_ptr_zk = zk_res[0];
+        if (max_log_ptr_zk.error != Coordination::Error::ZOK)
+            throw Coordination::Exception(max_log_ptr_zk.error);

-        for (size_t i = 0; i < res.size(); ++i)
-            if (res[i].error == Coordination::Error::ZOK)
-                statuses[i] = 1;
+        UInt32 max_log_ptr = parse<UInt32>(max_log_ptr_zk.data);

-        return statuses;
+        ReplicasInfo replicas_info;
+        replicas_info.resize((zk_res.size() - 1) / 2);
+
+        size_t global_replica_index = 0;
+        for (size_t shard_index = 0; shard_index < shards_info.size(); ++shard_index)
+        {
+            for (const auto & replica : addresses_with_failover[shard_index])
+            {
+                auto replica_active = zk_res[2 * global_replica_index + 1];
+                auto replica_log_ptr = zk_res[2 * global_replica_index + 2];
+
+                UInt64 recovery_time = 0;
+                {
+                    std::lock_guard lock(ddl_worker_mutex);
+                    if (replica.is_local && ddl_worker)
+                        recovery_time = ddl_worker->getCurrentInitializationDurationMs();
+                }
+
+                replicas_info[global_replica_index] = ReplicaInfo{
+                    .is_active = replica_active.error == Coordination::Error::ZOK,
+                    .replication_lag = replica_log_ptr.error != Coordination::Error::ZNONODE ? std::optional(max_log_ptr - parse<UInt32>(replica_log_ptr.data)) : std::nullopt,
+                    .recovery_time = recovery_time,
+                };
+
+                ++global_replica_index;
+            }
+        }
+
+        return replicas_info;
    }
    catch (...)
    {
@ -373,7 +405,6 @@ std::vector<UInt8> DatabaseReplicated::tryGetAreReplicasActive(const ClusterPtr
    }
 }

-
 void DatabaseReplicated::fillClusterAuthInfo(String collection_name, const Poco::Util::AbstractConfiguration & config_ref)
 {
    const auto & config_prefix = fmt::format("named_collections.{}", collection_name);
--- a/src/Databases/DatabaseReplicated.h
+++ b/src/Databases/DatabaseReplicated.h
@ -1,5 +1,7 @@
 #pragma once

+#include <optional>
+
 #include <Databases/DatabaseAtomic.h>
 #include <Databases/DatabaseReplicatedSettings.h>
 #include <Common/ZooKeeper/ZooKeeper.h>
@ -17,6 +19,14 @@ using ZooKeeperPtr = std::shared_ptr<zkutil::ZooKeeper>;
 class Cluster;
 using ClusterPtr = std::shared_ptr<Cluster>;

+struct ReplicaInfo
+{
+    bool is_active;
+    std::optional<UInt32> replication_lag;
+    UInt64 recovery_time;
+};
+using ReplicasInfo = std::vector<ReplicaInfo>;
+
 class DatabaseReplicated : public DatabaseAtomic
 {
 public:
@ -84,7 +94,7 @@ public:

    static void dropReplica(DatabaseReplicated * database, const String & database_zookeeper_path, const String & shard, const String & replica, bool throw_if_noop);

-    std::vector<UInt8> tryGetAreReplicasActive(const ClusterPtr & cluster_) const;
+    ReplicasInfo tryGetReplicasInfo(const ClusterPtr & cluster_) const;

    void renameDatabase(ContextPtr query_context, const String & new_name) override;

--- a/src/Databases/DatabaseReplicatedWorker.cpp
+++ b/src/Databases/DatabaseReplicatedWorker.cpp
@ -32,6 +32,12 @@ DatabaseReplicatedDDLWorker::DatabaseReplicatedDDLWorker(DatabaseReplicated * db

 bool DatabaseReplicatedDDLWorker::initializeMainThread()
 {
+    {
+        std::lock_guard lock(initialization_duration_timer_mutex);
+        initialization_duration_timer.emplace();
+        initialization_duration_timer->start();
+    }
+
    while (!stop_flag)
    {
        try
@ -69,6 +75,10 @@ bool DatabaseReplicatedDDLWorker::initializeMainThread()

            initializeReplication();
            initialized = true;
+            {
+                std::lock_guard lock(initialization_duration_timer_mutex);
+                initialization_duration_timer.reset();
+            }
            return true;
        }
        catch (...)
@ -78,6 +88,11 @@ bool DatabaseReplicatedDDLWorker::initializeMainThread()
        }
    }

+    {
+        std::lock_guard lock(initialization_duration_timer_mutex);
+        initialization_duration_timer.reset();
+    }
+
    return false;
 }

@ -459,4 +474,10 @@ UInt32 DatabaseReplicatedDDLWorker::getLogPointer() const
    return max_id.load();
 }

+UInt64 DatabaseReplicatedDDLWorker::getCurrentInitializationDurationMs() const
+{
+    std::lock_guard lock(initialization_duration_timer_mutex);
+    return initialization_duration_timer ? initialization_duration_timer->elapsedMilliseconds() : 0;
+}
+
 }
--- a/src/Databases/DatabaseReplicatedWorker.h
+++ b/src/Databases/DatabaseReplicatedWorker.h
@ -36,6 +36,8 @@ public:
                                   DatabaseReplicated * const database, bool committed = false); /// NOLINT

    UInt32 getLogPointer() const;
+
+    UInt64 getCurrentInitializationDurationMs() const;
 private:
    bool initializeMainThread() override;
    void initializeReplication();
@ -56,6 +58,9 @@ private:
    ZooKeeperPtr active_node_holder_zookeeper;
    /// It will remove "active" node when database is detached
    zkutil::EphemeralNodeHolderPtr active_node_holder;
+
+    std::optional<Stopwatch> initialization_duration_timer;
+    mutable std::mutex initialization_duration_timer_mutex;
 };

 }
--- a/Show More
+++ b/Show More