Merge branch 'master' into allow-to-change-some-cache-settings-without-restart

2024-11-25 17:12:03 +00:00 · 2023-12-13 23:33:59 +01:00 · 2023-12-13 23:33:59 +01:00 · 79db3c66df
commit 79db3c66df
parent 38e38ef43f 82ebb5e2d1
410 changed files with 8848 additions and 4514 deletions
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@ -205,6 +205,12 @@ jobs:
    with:
      build_name: binary_amd64_compat
      checkout_depth: 0
+  BuilderBinAmd64Musl:
+    needs: [DockerHubPush]
+    uses: ./.github/workflows/reusable_build.yml
+    with:
+      build_name: binary_amd64_musl
+      checkout_depth: 0
  BuilderBinAarch64V80Compat:
    needs: [DockerHubPush]
    uses: ./.github/workflows/reusable_build.yml
@ -549,6 +555,27 @@ jobs:
        cd "$REPO_COPY/tests/ci"
        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
 ##############################################################################################
+########################### ClickBench #######################################################
+##############################################################################################
+  ClickBenchAMD64:
+    needs: [BuilderDebRelease]
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: ClickBench (amd64)
+      runner_type: func-tester
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 clickbench.py "$CHECK_NAME"
+  ClickBenchAarch64:
+    needs: [BuilderDebAarch64]
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: ClickBench (aarch64)
+      runner_type: func-tester-aarch64
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 clickbench.py "$CHECK_NAME"
+##############################################################################################
 ######################################### STRESS TESTS #######################################
 ##############################################################################################
  StressTestAsan:
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@ -242,6 +242,11 @@ jobs:
    uses: ./.github/workflows/reusable_build.yml
    with:
      build_name: binary_amd64_compat
+  BuilderBinAmd64Musl:
+    needs: [FastTest, StyleCheck]
+    uses: ./.github/workflows/reusable_build.yml
+    with:
+      build_name: binary_amd64_musl
  BuilderBinAarch64V80Compat:
    needs: [FastTest, StyleCheck]
    uses: ./.github/workflows/reusable_build.yml
@ -696,6 +701,27 @@ jobs:
        cd "$REPO_COPY/tests/ci"
        python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
 ##############################################################################################
+########################### ClickBench #######################################################
+##############################################################################################
+  ClickBenchAMD64:
+    needs: [BuilderDebRelease]
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: ClickBench (amd64)
+      runner_type: func-tester
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 clickbench.py "$CHECK_NAME"
+  ClickBenchAarch64:
+    needs: [BuilderDebAarch64]
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: ClickBench (aarch64)
+      runner_type: func-tester-aarch64
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 clickbench.py "$CHECK_NAME"
+##############################################################################################
 ######################################### STRESS TESTS #######################################
 ##############################################################################################
  StressTestAsan:
--- a/base/glibc-compatibility/glibc-compatibility.c
+++ b/base/glibc-compatibility/glibc-compatibility.c
@ -30,7 +30,6 @@ int __gai_sigqueue(int sig, const union sigval val, pid_t caller_pid)
 }


-#include <sys/select.h>
 #include <stdlib.h>
 #include <features.h>

--- a/base/poco/Foundation/CMakeLists.txt
+++ b/base/poco/Foundation/CMakeLists.txt
@ -55,7 +55,6 @@ set (SRCS
    src/DigestStream.cpp
    src/DirectoryIterator.cpp
    src/DirectoryIteratorStrategy.cpp
-    src/DirectoryWatcher.cpp
    src/Environment.cpp
    src/Error.cpp
    src/ErrorHandler.cpp
--- a/base/poco/Foundation/include/Poco/DirectoryWatcher.h
+++ b/base/poco/Foundation/include/Poco/DirectoryWatcher.h
@ -1,228 +0,0 @@
-//
-// DirectoryWatcher.h
-//
-// Library: Foundation
-// Package: Filesystem
-// Module:  DirectoryWatcher
-//
-// Definition of the DirectoryWatcher class.
-//
-// Copyright (c) 2012, Applied Informatics Software Engineering GmbH.
-// and Contributors.
-//
-// SPDX-License-Identifier:	BSL-1.0
-//
-
-
-#ifndef Foundation_DirectoryWatcher_INCLUDED
-#define Foundation_DirectoryWatcher_INCLUDED
-
-
-#include "Poco/Foundation.h"
-
-
-#ifndef POCO_NO_INOTIFY
-
-
-#    include "Poco/AtomicCounter.h"
-#    include "Poco/BasicEvent.h"
-#    include "Poco/File.h"
-#    include "Poco/Runnable.h"
-#    include "Poco/Thread.h"
-
-
-namespace Poco
-{
-
-
-class DirectoryWatcherStrategy;
-
-
-class Foundation_API DirectoryWatcher : protected Runnable
-/// This class is used to get notifications about changes
-/// to the filesystem, more specifically, to a specific
-/// directory. Changes to a directory are reported via
-/// events.
-///
-/// A thread will be created that watches the specified
-/// directory for changes. Events are reported in the context
-/// of this thread.
-///
-/// Note that changes to files in subdirectories of the watched
-/// directory are not reported. Separate DirectoryWatcher objects
-/// must be created for these directories if they should be watched.
-///
-/// Changes to file attributes are not reported.
-///
-/// On Windows, this class is implemented using FindFirstChangeNotification()/FindNextChangeNotification().
-/// On Linux, this class is implemented using inotify.
-/// On FreeBSD and Darwin (Mac OS X, iOS), this class uses kevent/kqueue.
-/// On all other platforms, the watched directory is periodically scanned
-/// for changes. This can negatively affect performance if done too often.
-/// Therefore, the interval in which scans are done can be specified in
-/// the constructor. Note that periodic scanning will also be done on FreeBSD
-/// and Darwin if events for changes to files (DW_ITEM_MODIFIED) are enabled.
-///
-/// DW_ITEM_MOVED_FROM and DW_ITEM_MOVED_TO events will only be reported
-/// on Linux. On other platforms, a file rename or move operation
-/// will be reported via a DW_ITEM_REMOVED and a DW_ITEM_ADDED event.
-/// The order of these two events is not defined.
-///
-/// An event mask can be specified to enable only certain events.
-{
-public:
-    enum DirectoryEventType
-    {
-        DW_ITEM_ADDED = 1,
-        /// A new item has been created and added to the directory.
-
-        DW_ITEM_REMOVED = 2,
-        /// An item has been removed from the directory.
-
-        DW_ITEM_MODIFIED = 4,
-        /// An item has been modified.
-
-        DW_ITEM_MOVED_FROM = 8,
-        /// An item has been renamed or moved. This event delivers the old name.
-
-        DW_ITEM_MOVED_TO = 16
-        /// An item has been renamed or moved. This event delivers the new name.
-    };
-
-    enum DirectoryEventMask
-    {
-        DW_FILTER_ENABLE_ALL = 31,
-        /// Enables all event types.
-
-        DW_FILTER_DISABLE_ALL = 0
-        /// Disables all event types.
-    };
-
-    enum
-    {
-        DW_DEFAULT_SCAN_INTERVAL = 5 /// Default scan interval for platforms that don't provide a native notification mechanism.
-    };
-
-    struct DirectoryEvent
-    {
-        DirectoryEvent(const File & f, DirectoryEventType ev) : item(f), event(ev) { }
-
-        const File & item; /// The directory or file that has been changed.
-        DirectoryEventType event; /// The kind of event.
-    };
-
-    BasicEvent<const DirectoryEvent> itemAdded;
-    /// Fired when a file or directory has been created or added to the directory.
-
-    BasicEvent<const DirectoryEvent> itemRemoved;
-    /// Fired when a file or directory has been removed from the directory.
-
-    BasicEvent<const DirectoryEvent> itemModified;
-    /// Fired when a file or directory has been modified.
-
-    BasicEvent<const DirectoryEvent> itemMovedFrom;
-    /// Fired when a file or directory has been renamed. This event delivers the old name.
-
-    BasicEvent<const DirectoryEvent> itemMovedTo;
-    /// Fired when a file or directory has been moved. This event delivers the new name.
-
-    BasicEvent<const Exception> scanError;
-    /// Fired when an error occurs while scanning for changes.
-
-    DirectoryWatcher(const std::string & path, int eventMask = DW_FILTER_ENABLE_ALL, int scanInterval = DW_DEFAULT_SCAN_INTERVAL);
-    /// Creates a DirectoryWatcher for the directory given in path.
-    /// To enable only specific events, an eventMask can be specified by
-    /// OR-ing the desired event IDs (e.g., DW_ITEM_ADDED | DW_ITEM_MODIFIED).
-    /// On platforms where no native filesystem notifications are available,
-    /// scanInterval specifies the interval in seconds between scans
-    /// of the directory.
-
-    DirectoryWatcher(const File & directory, int eventMask = DW_FILTER_ENABLE_ALL, int scanInterval = DW_DEFAULT_SCAN_INTERVAL);
-    /// Creates a DirectoryWatcher for the specified directory
-    /// To enable only specific events, an eventMask can be specified by
-    /// OR-ing the desired event IDs (e.g., DW_ITEM_ADDED | DW_ITEM_MODIFIED).
-    /// On platforms where no native filesystem notifications are available,
-    /// scanInterval specifies the interval in seconds between scans
-    /// of the directory.
-
-    ~DirectoryWatcher();
-    /// Destroys the DirectoryWatcher.
-
-    void suspendEvents();
-    /// Suspends sending of events. Can be called multiple times, but every
-    /// call to suspendEvent() must be matched by a call to resumeEvents().
-
-    void resumeEvents();
-    /// Resumes events, after they have been suspended with a call to suspendEvents().
-
-    bool eventsSuspended() const;
-    /// Returns true iff events are suspended.
-
-    int eventMask() const;
-    /// Returns the value of the eventMask passed to the constructor.
-
-    int scanInterval() const;
-    /// Returns the scan interval in seconds.
-
-    const File & directory() const;
-    /// Returns the directory being watched.
-
-    bool supportsMoveEvents() const;
-    /// Returns true iff the platform supports DW_ITEM_MOVED_FROM/itemMovedFrom and
-    /// DW_ITEM_MOVED_TO/itemMovedTo events.
-
-protected:
-    void init();
-    void stop();
-    void run();
-
-private:
-    DirectoryWatcher();
-    DirectoryWatcher(const DirectoryWatcher &);
-    DirectoryWatcher & operator=(const DirectoryWatcher &);
-
-    Thread _thread;
-    File _directory;
-    int _eventMask;
-    AtomicCounter _eventsSuspended;
-    int _scanInterval;
-    DirectoryWatcherStrategy * _pStrategy;
-};
-
-
-//
-// inlines
-//
-
-
-inline bool DirectoryWatcher::eventsSuspended() const
-{
-    return _eventsSuspended.value() > 0;
-}
-
-
-inline int DirectoryWatcher::eventMask() const
-{
-    return _eventMask;
-}
-
-
-inline int DirectoryWatcher::scanInterval() const
-{
-    return _scanInterval;
-}
-
-
-inline const File & DirectoryWatcher::directory() const
-{
-    return _directory;
-}
-
-
-} // namespace Poco
-
-
-#endif // POCO_NO_INOTIFY
-
-
-#endif // Foundation_DirectoryWatcher_INCLUDED
--- a/base/poco/Foundation/src/DirectoryWatcher.cpp
+++ b/base/poco/Foundation/src/DirectoryWatcher.cpp
@ -1,602 +0,0 @@
-//
-// DirectoryWatcher.cpp
-//
-// Library: Foundation
-// Package: Filesystem
-// Module:  DirectoryWatcher
-//
-// Copyright (c) 2012, Applied Informatics Software Engineering GmbH.
-// and Contributors.
-//
-// SPDX-License-Identifier:	BSL-1.0
-//
-
-
-#include "Poco/DirectoryWatcher.h"
-
-
-#ifndef POCO_NO_INOTIFY
-
-
-#include "Poco/Path.h"
-#include "Poco/Glob.h"
-#include "Poco/DirectoryIterator.h"
-#include "Poco/Event.h"
-#include "Poco/Exception.h"
-#include "Poco/Buffer.h"
-#if POCO_OS == POCO_OS_LINUX || POCO_OS == POCO_OS_ANDROID
-	#include <sys/inotify.h>
-	#include <sys/select.h>
-	#include <unistd.h>
-#elif POCO_OS == POCO_OS_MAC_OS_X || POCO_OS == POCO_OS_FREE_BSD
-	#include <fcntl.h>
-	#include <sys/types.h>
-	#include <sys/event.h>
-	#include <sys/time.h>
-	#include <unistd.h>
-	#if (POCO_OS == POCO_OS_FREE_BSD) && !defined(O_EVTONLY)
-		#define O_EVTONLY 0x8000
-	#endif
-#endif
-#include <algorithm>
-#include <atomic>
-#include <map>
-
-namespace Poco {
-
-
-class DirectoryWatcherStrategy
-{
-public:
-	DirectoryWatcherStrategy(DirectoryWatcher& owner):
-		_owner(owner)
-	{
-	}
-
-	virtual ~DirectoryWatcherStrategy()
-	{
-	}
-
-	DirectoryWatcher& owner()
-	{
-		return _owner;
-	}
-	
-	virtual void run() = 0;
-	virtual void stop() = 0;
-	virtual bool supportsMoveEvents() const = 0;
-
-protected:
-	struct ItemInfo
-	{
-		ItemInfo():
-			size(0)
-		{
-		}
-		
-		ItemInfo(const ItemInfo& other):
-			path(other.path),
-			size(other.size),
-			lastModified(other.lastModified)
-		{
-		}
-		
-		explicit ItemInfo(const File& f):
-			path(f.path()),
-			size(f.isFile() ? f.getSize() : 0),
-			lastModified(f.getLastModified())
-		{
-		}
-		
-		std::string path;
-		File::FileSize size;
-		Timestamp lastModified;
-	};
-	typedef std::map<std::string, ItemInfo> ItemInfoMap;
-
-	void scan(ItemInfoMap& entries)
-	{
-		DirectoryIterator it(owner().directory());
-		DirectoryIterator end;
-		while (it != end)
-		{
-			entries[it.path().getFileName()] = ItemInfo(*it);
-			++it;
-		}
-	}
-	
-	void compare(ItemInfoMap& oldEntries, ItemInfoMap& newEntries)
-	{
-		for (ItemInfoMap::iterator itn = newEntries.begin(); itn != newEntries.end(); ++itn)
-		{
-			ItemInfoMap::iterator ito = oldEntries.find(itn->first);
-			if (ito != oldEntries.end())
-			{
-				if ((owner().eventMask() & DirectoryWatcher::DW_ITEM_MODIFIED) && !owner().eventsSuspended())
-				{
-					if (itn->second.size != ito->second.size || itn->second.lastModified != ito->second.lastModified)
-					{
-						Poco::File f(itn->second.path);
-						DirectoryWatcher::DirectoryEvent ev(f, DirectoryWatcher::DW_ITEM_MODIFIED);
-						owner().itemModified(&owner(), ev);
-					}
-				}
-				oldEntries.erase(ito);
-			}
-			else if ((owner().eventMask() & DirectoryWatcher::DW_ITEM_ADDED) && !owner().eventsSuspended())
-			{
-				Poco::File f(itn->second.path);
-				DirectoryWatcher::DirectoryEvent ev(f, DirectoryWatcher::DW_ITEM_ADDED);
-				owner().itemAdded(&owner(), ev);
-			}
-		}
-		if ((owner().eventMask() & DirectoryWatcher::DW_ITEM_REMOVED) && !owner().eventsSuspended())
-		{
-			for (ItemInfoMap::iterator it = oldEntries.begin(); it != oldEntries.end(); ++it)
-			{
-				Poco::File f(it->second.path);
-				DirectoryWatcher::DirectoryEvent ev(f, DirectoryWatcher::DW_ITEM_REMOVED);
-				owner().itemRemoved(&owner(), ev);
-			}
-		}
-	}
-
-private:
-	DirectoryWatcherStrategy();
-	DirectoryWatcherStrategy(const DirectoryWatcherStrategy&);
-	DirectoryWatcherStrategy& operator = (const DirectoryWatcherStrategy&);
-	
-	DirectoryWatcher& _owner;
-};
-
-
-#if POCO_OS == POCO_OS_WINDOWS_NT
-
-
-class WindowsDirectoryWatcherStrategy: public DirectoryWatcherStrategy
-{
-public:
-	WindowsDirectoryWatcherStrategy(DirectoryWatcher& owner):
-		DirectoryWatcherStrategy(owner)
-	{
-		_hStopped = CreateEventW(NULL, FALSE, FALSE, NULL);
-		if (!_hStopped)
-			throw SystemException("cannot create event");
-	}
-	
-	~WindowsDirectoryWatcherStrategy()
-	{
-		CloseHandle(_hStopped);
-	}
-	
-	void run()
-	{
-		ItemInfoMap entries;
-		scan(entries);
-		
-		DWORD filter = FILE_NOTIFY_CHANGE_FILE_NAME | FILE_NOTIFY_CHANGE_DIR_NAME;
-		if (owner().eventMask() & DirectoryWatcher::DW_ITEM_MODIFIED)
-			filter |= FILE_NOTIFY_CHANGE_SIZE | FILE_NOTIFY_CHANGE_LAST_WRITE;
-		
-		std::string path(owner().directory().path());
-		HANDLE hChange = FindFirstChangeNotificationA(path.c_str(), FALSE, filter);
-
-		if (hChange == INVALID_HANDLE_VALUE)
-		{
-			try
-			{
-				FileImpl::handleLastErrorImpl(path);
-			}
-			catch (Poco::Exception& exc)
-			{
-				owner().scanError(&owner(), exc);
-			}
-			return;
-		}
-		
-		bool stopped = false;
-		while (!stopped)
-		{
-			try
-			{
-				HANDLE h[2];
-				h[0] = _hStopped;
-				h[1] = hChange;
-				switch (WaitForMultipleObjects(2, h, FALSE, INFINITE))
-				{
-				case WAIT_OBJECT_0:
-					stopped = true;
-					break;
-				case WAIT_OBJECT_0 + 1:
-					{
-						ItemInfoMap newEntries;
-						scan(newEntries);
-						compare(entries, newEntries);
-						std::swap(entries, newEntries);
-						if (FindNextChangeNotification(hChange) == FALSE)
-						{
-							FileImpl::handleLastErrorImpl(path);
-						}
-					}
-					break;
-				default:
-					throw SystemException("failed to wait for directory changes");
-				}
-			}
-			catch (Poco::Exception& exc)
-			{
-				owner().scanError(&owner(), exc);
-			}			
-		}
-		FindCloseChangeNotification(hChange);
-	}
-	
-	void stop()
-	{
-		SetEvent(_hStopped);
-	}
-	
-	bool supportsMoveEvents() const
-	{
-		return false;
-	}
-	
-private:
-	HANDLE _hStopped;
-};
-
-
-#elif POCO_OS == POCO_OS_LINUX || POCO_OS == POCO_OS_ANDROID
-
-
-class LinuxDirectoryWatcherStrategy: public DirectoryWatcherStrategy
-{
-public:
-	LinuxDirectoryWatcherStrategy(DirectoryWatcher& owner):
-		DirectoryWatcherStrategy(owner),
-		_fd(-1),
-		_stopped(false)
-	{
-		_fd = inotify_init();
-		if (_fd == -1) throw Poco::IOException("cannot initialize inotify", errno);
-	}
-	
-	~LinuxDirectoryWatcherStrategy()
-	{
-		close(_fd);
-	}
-	
-	void run()
-	{
-		int mask = 0;
-		if (owner().eventMask() & DirectoryWatcher::DW_ITEM_ADDED)
-			mask |= IN_CREATE;
-		if (owner().eventMask() & DirectoryWatcher::DW_ITEM_REMOVED)
-			mask |= IN_DELETE;
-		if (owner().eventMask() & DirectoryWatcher::DW_ITEM_MODIFIED)
-			mask |= IN_MODIFY;
-		if (owner().eventMask() & DirectoryWatcher::DW_ITEM_MOVED_FROM)
-			mask |= IN_MOVED_FROM;
-		if (owner().eventMask() & DirectoryWatcher::DW_ITEM_MOVED_TO)
-			mask |= IN_MOVED_TO;
-		int wd = inotify_add_watch(_fd, owner().directory().path().c_str(), mask);
-		if (wd == -1)
-		{
-			try
-			{
-				FileImpl::handleLastErrorImpl(owner().directory().path());
-			}
-			catch (Poco::Exception& exc)
-			{
-				owner().scanError(&owner(), exc);
-			}
-		}
-		
-		Poco::Buffer<char> buffer(4096);
-		while (!_stopped.load(std::memory_order_relaxed))
-		{
-			fd_set fds;
-			FD_ZERO(&fds);
-			FD_SET(_fd, &fds);
-
-			struct timeval tv;
-			tv.tv_sec  = 0;
-			tv.tv_usec = 200000;
-
-			if (select(_fd + 1, &fds, NULL, NULL, &tv) == 1)
-			{
-				int n = read(_fd, buffer.begin(), buffer.size());
-				int i = 0;
-				if (n > 0)
-				{
-					while (n > 0)
-					{
-						struct inotify_event* event = reinterpret_cast<struct inotify_event*>(buffer.begin() + i);
-						
-						if (event->len > 0)
-						{						
-							if (!owner().eventsSuspended())
-							{
-								Poco::Path p(owner().directory().path());
-								p.makeDirectory();
-								p.setFileName(event->name);
-								Poco::File f(p.toString());
-	
-								if ((event->mask & IN_CREATE) && (owner().eventMask() & DirectoryWatcher::DW_ITEM_ADDED))
-								{
-									DirectoryWatcher::DirectoryEvent ev(f, DirectoryWatcher::DW_ITEM_ADDED);
-									owner().itemAdded(&owner(), ev);
-								}
-								if ((event->mask & IN_DELETE) && (owner().eventMask() & DirectoryWatcher::DW_ITEM_REMOVED))
-								{
-									DirectoryWatcher::DirectoryEvent ev(f, DirectoryWatcher::DW_ITEM_REMOVED);
-									owner().itemRemoved(&owner(), ev);
-								}
-								if ((event->mask & IN_MODIFY) && (owner().eventMask() & DirectoryWatcher::DW_ITEM_MODIFIED))
-								{
-									DirectoryWatcher::DirectoryEvent ev(f, DirectoryWatcher::DW_ITEM_MODIFIED);
-									owner().itemModified(&owner(), ev);
-								}
-								if ((event->mask & IN_MOVED_FROM) && (owner().eventMask() & DirectoryWatcher::DW_ITEM_MOVED_FROM))
-								{
-									DirectoryWatcher::DirectoryEvent ev(f, DirectoryWatcher::DW_ITEM_MOVED_FROM);
-									owner().itemMovedFrom(&owner(), ev);
-								}
-								if ((event->mask & IN_MOVED_TO) && (owner().eventMask() & DirectoryWatcher::DW_ITEM_MOVED_TO))
-								{
-									DirectoryWatcher::DirectoryEvent ev(f, DirectoryWatcher::DW_ITEM_MOVED_TO);
-									owner().itemMovedTo(&owner(), ev);
-								}
-							}
-						}
-						
-						i += sizeof(inotify_event) + event->len;
-						n -= sizeof(inotify_event) + event->len;
-					}
-				}
-			}
-		}
-	}
-	
-	void stop()
-	{
-		_stopped.store(true, std::memory_order_relaxed);
-	}
-	
-	bool supportsMoveEvents() const
-	{
-		return true;
-	}
-
-private:
-	int _fd;
-	std::atomic<bool> _stopped;
-};
-
-
-#elif POCO_OS == POCO_OS_MAC_OS_X || POCO_OS == POCO_OS_FREE_BSD
-
-
-class BSDDirectoryWatcherStrategy: public DirectoryWatcherStrategy
-{
-public:
-	BSDDirectoryWatcherStrategy(DirectoryWatcher& owner):
-		DirectoryWatcherStrategy(owner),
-		_queueFD(-1),
-		_dirFD(-1),
-		_stopped(false)
-	{
-		_dirFD = open(owner.directory().path().c_str(), O_EVTONLY);
-		if (_dirFD < 0) throw Poco::FileNotFoundException(owner.directory().path());
-		_queueFD = kqueue();
-		if (_queueFD < 0)
-		{
-			close(_dirFD);
-			throw Poco::SystemException("Cannot create kqueue", errno);
-		}
-	}
-
-	~BSDDirectoryWatcherStrategy()
-	{
-		close(_dirFD);
-		close(_queueFD);
-	}
-
-	void run()
-	{
-		Poco::Timestamp lastScan;
-		ItemInfoMap entries;
-		scan(entries);
-
-		while (!_stopped.load(std::memory_order_relaxed))
-		{
-			struct timespec timeout;
-			timeout.tv_sec = 0;
-			timeout.tv_nsec = 200000000;
-			unsigned eventFilter = NOTE_WRITE;
-			struct kevent event;
-			struct kevent eventData;
-			EV_SET(&event, _dirFD, EVFILT_VNODE, EV_ADD | EV_CLEAR, eventFilter, 0, 0);
-			int nEvents = kevent(_queueFD, &event, 1, &eventData, 1, &timeout);
-			if (nEvents < 0 || eventData.flags == EV_ERROR)
-			{
-				try
-				{
-					FileImpl::handleLastErrorImpl(owner().directory().path());
-				}
-				catch (Poco::Exception& exc)
-				{
-					owner().scanError(&owner(), exc);
-				}
-			}
-			else if (nEvents > 0 || ((owner().eventMask() & DirectoryWatcher::DW_ITEM_MODIFIED) && lastScan.isElapsed(owner().scanInterval()*1000000)))
-			{
-				ItemInfoMap newEntries;
-				scan(newEntries);
-				compare(entries, newEntries);
-				std::swap(entries, newEntries);
-				lastScan.update();
-			}
-		}
-	}
-
-	void stop()
-	{
-		_stopped.store(true, std::memory_order_relaxed);
-	}
-
-	bool supportsMoveEvents() const
-	{
-		return false;
-	}
-
-private:
-	int _queueFD;
-	int _dirFD;
-	std::atomic<bool> _stopped;
-};
-
-
-#else
-
-
-class PollingDirectoryWatcherStrategy: public DirectoryWatcherStrategy
-{
-public:
-	PollingDirectoryWatcherStrategy(DirectoryWatcher& owner):
-		DirectoryWatcherStrategy(owner)
-	{
-	}
-	
-	~PollingDirectoryWatcherStrategy()
-	{
-	}
-	
-	void run()
-	{
-		ItemInfoMap entries;
-		scan(entries);
-		while (!_stopped.tryWait(1000*owner().scanInterval()))
-		{
-			try
-			{
-				ItemInfoMap newEntries;
-				scan(newEntries);
-				compare(entries, newEntries);
-				std::swap(entries, newEntries);
-			}
-			catch (Poco::Exception& exc)
-			{
-				owner().scanError(&owner(), exc);
-			}
-		}
-	}
-	
-	void stop()
-	{
-		_stopped.set();
-	}
-
-	bool supportsMoveEvents() const
-	{
-		return false;
-	}
-
-private:
-	Poco::Event _stopped;
-};
-
-
-#endif
-
-
-DirectoryWatcher::DirectoryWatcher(const std::string& path, int eventMask, int scanInterval):
-	_directory(path),
-	_eventMask(eventMask),
-	_scanInterval(scanInterval)
-{
-	init();
-}
-
-	
-DirectoryWatcher::DirectoryWatcher(const Poco::File& directory, int eventMask, int scanInterval):
-	_directory(directory),
-	_eventMask(eventMask),
-	_scanInterval(scanInterval)
-{
-	init();
-}
-
-
-DirectoryWatcher::~DirectoryWatcher()
-{
-	try
-	{
-		stop();
-		delete _pStrategy;
-	}
-	catch (...)
-	{
-		poco_unexpected();
-	}
-}
-
-	
-void DirectoryWatcher::suspendEvents()
-{
-	poco_assert (_eventsSuspended > 0);
-	
-	_eventsSuspended--;
-}
-
-
-void DirectoryWatcher::resumeEvents()
-{
-	_eventsSuspended++;
-}
-
-
-void DirectoryWatcher::init()
-{
-	if (!_directory.exists())
-		throw Poco::FileNotFoundException(_directory.path());
-		
-	if (!_directory.isDirectory())
-		throw Poco::InvalidArgumentException("not a directory", _directory.path());
-
-#if POCO_OS == POCO_OS_WINDOWS_NT
-	_pStrategy = new WindowsDirectoryWatcherStrategy(*this);
-#elif POCO_OS == POCO_OS_LINUX || POCO_OS == POCO_OS_ANDROID
-	_pStrategy = new LinuxDirectoryWatcherStrategy(*this);
-#elif POCO_OS == POCO_OS_MAC_OS_X || POCO_OS == POCO_OS_FREE_BSD
-	_pStrategy = new BSDDirectoryWatcherStrategy(*this);
-#else
-	_pStrategy = new PollingDirectoryWatcherStrategy(*this);
-#endif
-	_thread.start(*this);
-}
-
-	
-void DirectoryWatcher::run()
-{
-	_pStrategy->run();
-}
-
-
-void DirectoryWatcher::stop()
-{
-	_pStrategy->stop();
-	_thread.join();
-}
-
-
-bool DirectoryWatcher::supportsMoveEvents() const
-{
-	return _pStrategy->supportsMoveEvents();
-}
-
-
-} // namespace Poco
-
-
-#endif // POCO_NO_INOTIFY
--- a/cmake/target.cmake
+++ b/cmake/target.cmake
@ -42,10 +42,8 @@ if (CMAKE_CROSSCOMPILING)
        if (ARCH_AARCH64)
            # FIXME: broken dependencies
            set (ENABLE_GRPC OFF CACHE INTERNAL "")
-            set (ENABLE_SENTRY OFF CACHE INTERNAL "")
        elseif (ARCH_PPC64LE)
            set (ENABLE_GRPC OFF CACHE INTERNAL "")
-            set (ENABLE_SENTRY OFF CACHE INTERNAL "")
        elseif (ARCH_RISCV64)
            # RISC-V support is preliminary
            set (GLIBC_COMPATIBILITY OFF CACHE INTERNAL "")
@ -73,19 +71,5 @@ if (CMAKE_CROSSCOMPILING)
        message (FATAL_ERROR "Trying to cross-compile to unsupported system: ${CMAKE_SYSTEM_NAME}!")
    endif ()

-    if (USE_MUSL)
-        # use of undeclared identifier 'PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP'
-        set (ENABLE_SENTRY OFF CACHE INTERNAL "")
-        set (ENABLE_ODBC OFF CACHE INTERNAL "")
-        set (ENABLE_GRPC OFF CACHE INTERNAL "")
-        set (ENABLE_HDFS OFF CACHE INTERNAL "")
-        set (ENABLE_EMBEDDED_COMPILER OFF CACHE INTERNAL "")
-        # use of drand48_data
-        set (ENABLE_AZURE_BLOB_STORAGE OFF CACHE INTERNAL "")
-    endif ()
-
-    # Don't know why but CXX_STANDARD doesn't work for cross-compilation
-    set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++20")
-
    message (STATUS "Cross-compiling for target: ${CMAKE_CXX_COMPILE_TARGET}")
 endif ()
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@ -134,9 +134,9 @@ add_contrib (libuv-cmake libuv)
 add_contrib (liburing-cmake liburing)
 add_contrib (amqpcpp-cmake AMQP-CPP) # requires: libuv
 add_contrib (cassandra-cmake cassandra) # requires: libuv
+add_contrib (curl-cmake curl)
+add_contrib (azure-cmake azure) # requires: curl
 if (NOT OS_DARWIN)
-    add_contrib (curl-cmake curl)
-    add_contrib (azure-cmake azure) # requires: curl
    add_contrib (sentry-native-cmake sentry-native) # requires: curl
 endif()
 add_contrib (fmtlib-cmake fmtlib)
--- a/contrib/azure
+++ b/contrib/azure
@ -1 +1 @@
-Subproject commit 096049bf24fffafcaccc132b9367694532716731
+Subproject commit 352ff0a61cb319ac1cc38c4058443ddf70147530
--- a/contrib/curl-cmake/CMakeLists.txt
+++ b/contrib/curl-cmake/CMakeLists.txt
@ -10,7 +10,7 @@ set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/curl")
 set (SRCS
    "${LIBRARY_DIR}/lib/altsvc.c"
    "${LIBRARY_DIR}/lib/amigaos.c"
-    "${LIBRARY_DIR}/lib/asyn-thread.c"
+    "${LIBRARY_DIR}/lib/asyn-ares.c"
    "${LIBRARY_DIR}/lib/base64.c"
    "${LIBRARY_DIR}/lib/bufq.c"
    "${LIBRARY_DIR}/lib/bufref.c"
@ -165,13 +165,14 @@ target_compile_definitions (_curl PRIVATE
    libcurl_EXPORTS
    OS="${CMAKE_SYSTEM_NAME}"
 )
+
 target_include_directories (_curl SYSTEM PUBLIC
    "${LIBRARY_DIR}/include"
    "${LIBRARY_DIR}/lib"
    . # curl_config.h
 )

-target_link_libraries (_curl PRIVATE OpenSSL::SSL)
+target_link_libraries (_curl PRIVATE OpenSSL::SSL ch_contrib::c-ares)

 # The library is large - avoid bloat (XXX: is it?)
 if (OMIT_HEAVY_DEBUG_SYMBOLS)
--- a/contrib/curl-cmake/curl_config.h
+++ b/contrib/curl-cmake/curl_config.h
@ -50,3 +50,4 @@
 #define ENABLE_IPV6
 #define USE_OPENSSL
 #define USE_THREADS_POSIX
+#define USE_ARES
--- a/contrib/libhdfs3
+++ b/contrib/libhdfs3
@ -1 +1 @@
-Subproject commit bdcb91354b1c05b21e73043a112a6f1e3b013497
+Subproject commit b9598e6016720a7c088bfe85ce1fa0410f9d2103
--- a/contrib/libhdfs3-cmake/CMakeLists.txt
+++ b/contrib/libhdfs3-cmake/CMakeLists.txt
@ -26,6 +26,11 @@ ADD_DEFINITIONS(-D__STDC_FORMAT_MACROS)
 ADD_DEFINITIONS(-D_GNU_SOURCE)
 ADD_DEFINITIONS(-D_GLIBCXX_USE_NANOSLEEP)
 ADD_DEFINITIONS(-DHAVE_NANOSLEEP)
+
+if (USE_MUSL)
+    ADD_DEFINITIONS(-DSTRERROR_R_RETURN_INT)
+endif ()
+
 set(HAVE_STEADY_CLOCK 1)
 set(HAVE_NESTED_EXCEPTION 1)
 SET(HAVE_BOOST_CHRONO 0)
--- a/contrib/librdkafka
+++ b/contrib/librdkafka
@ -1 +1 @@
-Subproject commit 6f3b483426a8c8ec950e27e446bec175cf8b553f
+Subproject commit 2d2aab6f5b79db1cfca15d7bf0dee75d00d82082
--- a/contrib/libxml2-cmake/linux_x86_64/include/libxml/xmlversion.h
+++ b/contrib/libxml2-cmake/linux_x86_64/include/libxml/xmlversion.h
@ -270,7 +270,7 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version);
 *
 * Whether iconv support is available
 */
-#if 1
+#if 0
 #define LIBXML_ICONV_ENABLED
 #endif

@ -499,5 +499,3 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version);
 }
 #endif /* __cplusplus */
 #endif
-
-
--- a/contrib/llvm-project
+++ b/contrib/llvm-project
@ -1 +1 @@
-Subproject commit e7b8befca85c8b847614432dba250c22d35fbae0
+Subproject commit 1834e42289c58402c804a87be4d489892b88f3ec
--- a/contrib/rocksdb-cmake/CMakeLists.txt
+++ b/contrib/rocksdb-cmake/CMakeLists.txt
@ -117,7 +117,7 @@ endif()

 add_definitions(-DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX)

-if (OS_LINUX OR OS_FREEBSD)
+if ((OS_LINUX OR OS_FREEBSD) AND NOT USE_MUSL)
  add_definitions(-DROCKSDB_PTHREAD_ADAPTIVE_MUTEX)
 endif()

--- a/contrib/sentry-native
+++ b/contrib/sentry-native
@ -1 +1 @@
-Subproject commit ae10fb8c224c3f41571446e1ed7fd57b9e5e366b
+Subproject commit bc359f86cbf0f73f6fd4b6bfb4ede0c1f8c9400f
--- a/contrib/sentry-native-cmake/CMakeLists.txt
+++ b/contrib/sentry-native-cmake/CMakeLists.txt
@ -13,6 +13,7 @@ set (SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/sentry-native")

 set (SRCS
    ${SRC_DIR}/vendor/mpack.c
+    ${SRC_DIR}/vendor/stb_sprintf.c
    ${SRC_DIR}/src/sentry_alloc.c
    ${SRC_DIR}/src/sentry_backend.c
    ${SRC_DIR}/src/sentry_core.c
@ -21,6 +22,7 @@ set (SRCS
    ${SRC_DIR}/src/sentry_json.c
    ${SRC_DIR}/src/sentry_logger.c
    ${SRC_DIR}/src/sentry_options.c
+    ${SRC_DIR}/src/sentry_os.c
    ${SRC_DIR}/src/sentry_random.c
    ${SRC_DIR}/src/sentry_ratelimiter.c
    ${SRC_DIR}/src/sentry_scope.c
@ -29,6 +31,7 @@ set (SRCS
    ${SRC_DIR}/src/sentry_string.c
    ${SRC_DIR}/src/sentry_sync.c
    ${SRC_DIR}/src/sentry_transport.c
+    ${SRC_DIR}/src/sentry_tracing.c
    ${SRC_DIR}/src/sentry_utils.c
    ${SRC_DIR}/src/sentry_uuid.c
    ${SRC_DIR}/src/sentry_value.c
--- a/contrib/unixodbc-cmake/CMakeLists.txt
+++ b/contrib/unixodbc-cmake/CMakeLists.txt
@ -1,7 +1,7 @@
 option (ENABLE_ODBC "Enable ODBC library" ${ENABLE_LIBRARIES})
-if (NOT OS_LINUX)
+if (NOT OS_LINUX OR USE_MUSL)
    if (ENABLE_ODBC)
-        message(STATUS "ODBC is only supported on Linux")
+        message(STATUS "ODBC is only supported on Linux with dynamic linking")
    endif()
    set (ENABLE_ODBC OFF CACHE INTERNAL "")
 endif ()
--- a/docker/images.json
+++ b/docker/images.json
@ -125,6 +125,7 @@
            "docker/test/server-jepsen",
            "docker/test/sqllogic",
            "docker/test/sqltest",
+            "docker/test/clickbench",
            "docker/test/stateless"
         ]
    },
@ -145,6 +146,10 @@
        "name": "clickhouse/server-jepsen-test",
        "dependent": []
    },
+    "docker/test/clickbench": {
+        "name": "clickhouse/clickbench",
+        "dependent": []
+    },
    "docker/test/install/deb": {
        "name": "clickhouse/install-deb-test",
        "dependent": []
--- a/docker/keeper/Dockerfile
+++ b/docker/keeper/Dockerfile
@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.11.1.2711"
+ARG VERSION="23.11.2.11"
 ARG PACKAGES="clickhouse-keeper"

 # user/group precreated explicitly with fixed uid/gid on purpose.
--- a/docker/packager/packager
+++ b/docker/packager/packager
@ -145,6 +145,7 @@ def parse_env_variables(
    RISCV_SUFFIX = "-riscv64"
    S390X_SUFFIX = "-s390x"
    AMD64_COMPAT_SUFFIX = "-amd64-compat"
+    AMD64_MUSL_SUFFIX = "-amd64-musl"

    result = []
    result.append("OUTPUT_DIR=/output")
@ -163,6 +164,7 @@ def parse_env_variables(
    is_cross_s390x = compiler.endswith(S390X_SUFFIX)
    is_cross_freebsd = compiler.endswith(FREEBSD_SUFFIX)
    is_amd64_compat = compiler.endswith(AMD64_COMPAT_SUFFIX)
+    is_amd64_musl = compiler.endswith(AMD64_MUSL_SUFFIX)

    if is_cross_darwin:
        cc = compiler[: -len(DARWIN_SUFFIX)]
@ -232,6 +234,12 @@ def parse_env_variables(
        cc = compiler[: -len(AMD64_COMPAT_SUFFIX)]
        result.append("DEB_ARCH=amd64")
        cmake_flags.append("-DNO_SSE3_OR_HIGHER=1")
+    elif is_amd64_musl:
+        cc = compiler[: -len(AMD64_MUSL_SUFFIX)]
+        result.append("DEB_ARCH=amd64")
+        cmake_flags.append(
+            "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-x86_64-musl.cmake"
+        )
    else:
        cc = compiler
        result.append("DEB_ARCH=amd64")
@ -396,6 +404,7 @@ def parse_args() -> argparse.Namespace:
            "clang-17-riscv64",
            "clang-17-s390x",
            "clang-17-amd64-compat",
+            "clang-17-amd64-musl",
            "clang-17-freebsd",
        ),
        default="clang-17",
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.11.1.2711"
+ARG VERSION="23.11.2.11"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"

 # user/group precreated explicitly with fixed uid/gid on purpose.
--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@ -30,7 +30,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list

 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
-ARG VERSION="23.11.1.2711"
+ARG VERSION="23.11.2.11"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"

 # set non-empty deb_location_url url to create a docker image
--- a/docker/test/base/Dockerfile
+++ b/docker/test/base/Dockerfile
@ -12,6 +12,7 @@ RUN apt-get update \
        ripgrep \
        zstd \
        locales \
+        sudo \
        --yes --no-install-recommends

 # Sanitizer options for services (clickhouse-server)
--- a/docker/test/base/setup_export_logs.sh
+++ b/docker/test/base/setup_export_logs.sh
@ -21,7 +21,7 @@ EXTRA_ORDER_BY_COLUMNS=${EXTRA_ORDER_BY_COLUMNS:-"check_name, "}

 # trace_log needs more columns for symbolization
 EXTRA_COLUMNS_TRACE_LOG="${EXTRA_COLUMNS} symbols Array(LowCardinality(String)), lines Array(LowCardinality(String)), "
-EXTRA_COLUMNS_EXPRESSION_TRACE_LOG="${EXTRA_COLUMNS_EXPRESSION}, arrayMap(x -> toLowCardinality(demangle(addressToSymbol(x))), trace) AS symbols, arrayMap(x -> toLowCardinality(addressToLine(x)), trace) AS lines"
+EXTRA_COLUMNS_EXPRESSION_TRACE_LOG="${EXTRA_COLUMNS_EXPRESSION}, arrayMap(x -> demangle(addressToSymbol(x)), trace)::Array(LowCardinality(String)) AS symbols, arrayMap(x -> addressToLine(x), trace)::Array(LowCardinality(String)) AS lines"


 function __set_connection_args
--- a/docker/test/clickbench/Dockerfile
+++ b/docker/test/clickbench/Dockerfile
@ -0,0 +1,10 @@
+ARG FROM_TAG=latest
+FROM clickhouse/test-base:$FROM_TAG
+
+ENV TZ=Europe/Amsterdam
+RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
+
+COPY *.sh /
+COPY *.sql /
+
+CMD ["/bin/bash", "/run.sh"]
--- a/docker/test/clickbench/create.sql
+++ b/docker/test/clickbench/create.sql
@ -0,0 +1,112 @@
+ATTACH TABLE hits UUID 'c449dfbf-ba06-4d13-abec-8396559eb955'
+(
+    WatchID BIGINT NOT NULL,
+    JavaEnable SMALLINT NOT NULL,
+    Title TEXT NOT NULL,
+    GoodEvent SMALLINT NOT NULL,
+    EventTime TIMESTAMP NOT NULL,
+    EventDate Date NOT NULL,
+    CounterID INTEGER NOT NULL,
+    ClientIP INTEGER NOT NULL,
+    RegionID INTEGER NOT NULL,
+    UserID BIGINT NOT NULL,
+    CounterClass SMALLINT NOT NULL,
+    OS SMALLINT NOT NULL,
+    UserAgent SMALLINT NOT NULL,
+    URL TEXT NOT NULL,
+    Referer TEXT NOT NULL,
+    IsRefresh SMALLINT NOT NULL,
+    RefererCategoryID SMALLINT NOT NULL,
+    RefererRegionID INTEGER NOT NULL,
+    URLCategoryID SMALLINT NOT NULL,
+    URLRegionID INTEGER NOT NULL,
+    ResolutionWidth SMALLINT NOT NULL,
+    ResolutionHeight SMALLINT NOT NULL,
+    ResolutionDepth SMALLINT NOT NULL,
+    FlashMajor SMALLINT NOT NULL,
+    FlashMinor SMALLINT NOT NULL,
+    FlashMinor2 TEXT NOT NULL,
+    NetMajor SMALLINT NOT NULL,
+    NetMinor SMALLINT NOT NULL,
+    UserAgentMajor SMALLINT NOT NULL,
+    UserAgentMinor VARCHAR(255) NOT NULL,
+    CookieEnable SMALLINT NOT NULL,
+    JavascriptEnable SMALLINT NOT NULL,
+    IsMobile SMALLINT NOT NULL,
+    MobilePhone SMALLINT NOT NULL,
+    MobilePhoneModel TEXT NOT NULL,
+    Params TEXT NOT NULL,
+    IPNetworkID INTEGER NOT NULL,
+    TraficSourceID SMALLINT NOT NULL,
+    SearchEngineID SMALLINT NOT NULL,
+    SearchPhrase TEXT NOT NULL,
+    AdvEngineID SMALLINT NOT NULL,
+    IsArtifical SMALLINT NOT NULL,
+    WindowClientWidth SMALLINT NOT NULL,
+    WindowClientHeight SMALLINT NOT NULL,
+    ClientTimeZone SMALLINT NOT NULL,
+    ClientEventTime TIMESTAMP NOT NULL,
+    SilverlightVersion1 SMALLINT NOT NULL,
+    SilverlightVersion2 SMALLINT NOT NULL,
+    SilverlightVersion3 INTEGER NOT NULL,
+    SilverlightVersion4 SMALLINT NOT NULL,
+    PageCharset TEXT NOT NULL,
+    CodeVersion INTEGER NOT NULL,
+    IsLink SMALLINT NOT NULL,
+    IsDownload SMALLINT NOT NULL,
+    IsNotBounce SMALLINT NOT NULL,
+    FUniqID BIGINT NOT NULL,
+    OriginalURL TEXT NOT NULL,
+    HID INTEGER NOT NULL,
+    IsOldCounter SMALLINT NOT NULL,
+    IsEvent SMALLINT NOT NULL,
+    IsParameter SMALLINT NOT NULL,
+    DontCountHits SMALLINT NOT NULL,
+    WithHash SMALLINT NOT NULL,
+    HitColor CHAR NOT NULL,
+    LocalEventTime TIMESTAMP NOT NULL,
+    Age SMALLINT NOT NULL,
+    Sex SMALLINT NOT NULL,
+    Income SMALLINT NOT NULL,
+    Interests SMALLINT NOT NULL,
+    Robotness SMALLINT NOT NULL,
+    RemoteIP INTEGER NOT NULL,
+    WindowName INTEGER NOT NULL,
+    OpenerName INTEGER NOT NULL,
+    HistoryLength SMALLINT NOT NULL,
+    BrowserLanguage TEXT NOT NULL,
+    BrowserCountry TEXT NOT NULL,
+    SocialNetwork TEXT NOT NULL,
+    SocialAction TEXT NOT NULL,
+    HTTPError SMALLINT NOT NULL,
+    SendTiming INTEGER NOT NULL,
+    DNSTiming INTEGER NOT NULL,
+    ConnectTiming INTEGER NOT NULL,
+    ResponseStartTiming INTEGER NOT NULL,
+    ResponseEndTiming INTEGER NOT NULL,
+    FetchTiming INTEGER NOT NULL,
+    SocialSourceNetworkID SMALLINT NOT NULL,
+    SocialSourcePage TEXT NOT NULL,
+    ParamPrice BIGINT NOT NULL,
+    ParamOrderID TEXT NOT NULL,
+    ParamCurrency TEXT NOT NULL,
+    ParamCurrencyID SMALLINT NOT NULL,
+    OpenstatServiceName TEXT NOT NULL,
+    OpenstatCampaignID TEXT NOT NULL,
+    OpenstatAdID TEXT NOT NULL,
+    OpenstatSourceID TEXT NOT NULL,
+    UTMSource TEXT NOT NULL,
+    UTMMedium TEXT NOT NULL,
+    UTMCampaign TEXT NOT NULL,
+    UTMContent TEXT NOT NULL,
+    UTMTerm TEXT NOT NULL,
+    FromTag TEXT NOT NULL,
+    HasGCLID SMALLINT NOT NULL,
+    RefererHash BIGINT NOT NULL,
+    URLHash BIGINT NOT NULL,
+    CLID INTEGER NOT NULL,
+    PRIMARY KEY (CounterID, EventDate, UserID, EventTime, WatchID)
+)
+ENGINE = MergeTree
+SETTINGS disk = disk(type = cache, path = '/dev/shm/clickhouse/', max_size = '16G',
+         disk = disk(type = web, endpoint = 'https://clickhouse-datasets-web.s3.us-east-1.amazonaws.com/'));
--- a/docker/test/clickbench/queries.sql
+++ b/docker/test/clickbench/queries.sql
@ -0,0 +1,43 @@
+SELECT COUNT(*) FROM hits;
+SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0;
+SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits;
+SELECT AVG(UserID) FROM hits;
+SELECT COUNT(DISTINCT UserID) FROM hits;
+SELECT COUNT(DISTINCT SearchPhrase) FROM hits;
+SELECT MIN(EventDate), MAX(EventDate) FROM hits;
+SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC;
+SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10;
+SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10;
+SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
+SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
+SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
+SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
+SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
+SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10;
+SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10;
+SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10;
+SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10;
+SELECT UserID FROM hits WHERE UserID = 435090932899640449;
+SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%';
+SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
+SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
+SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10;
+SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10;
+SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10;
+SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10;
+SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
+SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
+SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits;
+SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
+SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
+SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
+SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10;
+SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10;
+SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10;
+SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
+SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
+SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000;
+SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000;
+SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100;
+SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000;
+SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000;
--- a/docker/test/clickbench/run.sh
+++ b/docker/test/clickbench/run.sh
@ -0,0 +1,79 @@
+#!/bin/bash
+
+SCRIPT_PID=$!
+(sleep 1200 && kill -9 $SCRIPT_PID) &
+
+# shellcheck disable=SC1091
+source /setup_export_logs.sh
+
+# fail on errors, verbose and export all env variables
+set -e -x -a
+
+dpkg -i package_folder/clickhouse-common-static_*.deb
+dpkg -i package_folder/clickhouse-server_*.deb
+dpkg -i package_folder/clickhouse-client_*.deb
+
+# A directory for cache
+mkdir /dev/shm/clickhouse
+chown clickhouse:clickhouse /dev/shm/clickhouse
+
+# Allow introspection functions, needed for sending the logs
+echo "
+profiles:
+    default:
+        allow_introspection_functions: 1
+" > /etc/clickhouse-server/users.d/allow_introspection_functions.yaml
+
+# Enable text_log
+echo "
+text_log:
+" > /etc/clickhouse-server/config.d/text_log.yaml
+
+config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml
+
+clickhouse start
+
+# Wait for the server to start, but not for too long.
+for _ in {1..100}
+do
+    clickhouse-client --query "SELECT 1" && break
+    sleep 1
+done
+
+setup_logs_replication
+
+# Load the data
+
+clickhouse-client --time < /create.sql
+
+# Run the queries
+
+set +x
+
+TRIES=3
+QUERY_NUM=1
+while read -r query; do
+    echo -n "["
+    for i in $(seq 1 $TRIES); do
+        RES=$(clickhouse-client --time --format Null --query "$query" --progress 0 2>&1 ||:)
+        echo -n "${RES}"
+        [[ "$i" != "$TRIES" ]] && echo -n ", "
+
+        echo "${QUERY_NUM},${i},${RES}" >> /test_output/test_results.tsv
+    done
+    echo "],"
+
+    QUERY_NUM=$((QUERY_NUM + 1))
+done < /queries.sql
+
+set -x
+
+clickhouse-client --query "SELECT total_bytes FROM system.tables WHERE name = 'hits' AND database = 'default'"
+
+clickhouse-client -q "system flush logs" ||:
+stop_logs_replication
+clickhouse stop
+
+mv /var/log/clickhouse-server/* /test_output/
+
+echo -e "success\tClickBench finished" > /test_output/check_status.tsv
--- a/docker/test/fasttest/run.sh
+++ b/docker/test/fasttest/run.sh
@ -16,7 +16,7 @@ export LLVM_VERSION=${LLVM_VERSION:-17}
 # it being undefined. Also read it as array so that we can pass an empty list
 # of additional variable to cmake properly, and it doesn't generate an extra
 # empty parameter.
-# Read it as CMAKE_FLAGS to not lose exported FASTTEST_CMAKE_FLAGS on subsequential launch
+# Read it as CMAKE_FLAGS to not lose exported FASTTEST_CMAKE_FLAGS on subsequent launch
 read -ra CMAKE_FLAGS <<< "${FASTTEST_CMAKE_FLAGS:-}"

 # Run only matching tests.
@ -197,7 +197,7 @@ function run_cmake

    (
        cd "$FASTTEST_BUILD"
-        cmake "$FASTTEST_SOURCE" -DCMAKE_CXX_COMPILER="clang++-${LLVM_VERSION}" -DCMAKE_C_COMPILER="clang-${LLVM_VERSION}" "${CMAKE_LIBS_CONFIG[@]}" "${CMAKE_FLAGS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/cmake_log.txt"
+        cmake "$FASTTEST_SOURCE" -DCMAKE_CXX_COMPILER="clang++-${LLVM_VERSION}" -DCMAKE_C_COMPILER="clang-${LLVM_VERSION}" -DCMAKE_TOOLCHAIN_FILE="${FASTTEST_SOURCE}/cmake/linux/toolchain-x86_64-musl.cmake" "${CMAKE_LIBS_CONFIG[@]}" "${CMAKE_FLAGS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/cmake_log.txt"
    )
 }

--- a/docker/test/stateful/run.sh
+++ b/docker/test/stateful/run.sh
@ -24,6 +24,22 @@ azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log &

 config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml

+cache_policy=""
+if [ $(( $(date +%-d) % 2 )) -eq 1 ]; then
+    cache_policy="SLRU"
+else
+    cache_policy="LRU"
+fi
+
+echo "Using cache policy: $cache_policy"
+
+if [ "$cache_policy" = "SLRU" ]; then
+    sudo cat /etc/clickhouse-server/config.d/storage_conf.xml \
+    | sed "s|<cache_policy>LRU</cache_policy>|<cache_policy>SLRU</cache_policy>|" \
+    > /etc/clickhouse-server/config.d/storage_conf.xml.tmp
+    mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml
+fi
+
 function start()
 {
    if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
@ -135,7 +151,7 @@ function run_tests()
    set +e

    if [[ -n "$USE_PARALLEL_REPLICAS" ]] && [[ "$USE_PARALLEL_REPLICAS" -eq 1 ]]; then
-        clickhouse-test --client="clickhouse-client --use_hedged_requests=0  --allow_experimental_parallel_reading_from_replicas=1 --parallel_replicas_for_non_replicated_merge_tree=1 \
+        clickhouse-test --client="clickhouse-client --allow_experimental_parallel_reading_from_replicas=1 --parallel_replicas_for_non_replicated_merge_tree=1 \
            --max_parallel_replicas=100 --cluster_for_parallel_replicas='parallel_replicas'" \
            -j 2 --testname --shard --zookeeper --check-zookeeper-session --no-stateless --no-parallel-replicas --hung-check --print-time "${ADDITIONAL_OPTIONS[@]}" \
        "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
--- a/docker/test/stateful/s3downloader
+++ b/docker/test/stateful/s3downloader
@ -30,7 +30,7 @@ def build_url(base_url, dataset):
    return os.path.join(base_url, dataset, "partitions", AVAILABLE_DATASETS[dataset])


-def dowload_with_progress(url, path):
+def download_with_progress(url, path):
    logging.info("Downloading from %s to temp path %s", url, path)
    for i in range(RETRIES_COUNT):
        try:
@ -110,7 +110,7 @@ if __name__ == "__main__":
        temp_archive_path = _get_temp_file_name()
        try:
            download_url_for_dataset = build_url(args.url_prefix, dataset)
-            dowload_with_progress(download_url_for_dataset, temp_archive_path)
+            download_with_progress(download_url_for_dataset, temp_archive_path)
            unpack_to_clickhouse_directory(temp_archive_path, args.clickhouse_data_path)
        except Exception as ex:
            logging.info("Some exception occured %s", str(ex))
--- a/docker/test/stress/run.sh
+++ b/docker/test/stress/run.sh
@ -65,9 +65,27 @@ chmod 777 -R /var/lib/clickhouse
 clickhouse-client --query "ATTACH DATABASE IF NOT EXISTS datasets ENGINE = Ordinary"
 clickhouse-client --query "CREATE DATABASE IF NOT EXISTS test"

+
 stop
 mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.initial.log

+# Randomize cache policies.
+cache_policy=""
+if [ $(( $(date +%-d) % 2 )) -eq 1 ]; then
+    cache_policy="SLRU"
+else
+    cache_policy="LRU"
+fi
+
+echo "Using cache policy: $cache_policy"
+
+if [ "$cache_policy" = "SLRU" ]; then
+    sudo cat /etc/clickhouse-server/config.d/storage_conf.xml \
+    | sed "s|<cache_policy>LRU</cache_policy>|<cache_policy>SLRU</cache_policy>|" \
+    > /etc/clickhouse-server/config.d/storage_conf.xml.tmp
+    mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml
+fi
+
 start

 clickhouse-client --query "SHOW TABLES FROM datasets"
@ -191,6 +209,13 @@ sudo cat /etc/clickhouse-server/config.d/logger_trace.xml \
   > /etc/clickhouse-server/config.d/logger_trace.xml.tmp
 mv /etc/clickhouse-server/config.d/logger_trace.xml.tmp /etc/clickhouse-server/config.d/logger_trace.xml

+if [ "$cache_policy" = "SLRU" ]; then
+    sudo cat /etc/clickhouse-server/config.d/storage_conf.xml \
+    | sed "s|<cache_policy>LRU</cache_policy>|<cache_policy>SLRU</cache_policy>|" \
+    > /etc/clickhouse-server/config.d/storage_conf.xml.tmp
+    mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml
+fi
+
 # Randomize async_load_databases
 if [ $(( $(date +%-d) % 2 )) -eq 1 ]; then
    sudo echo "<clickhouse><async_load_databases>true</async_load_databases></clickhouse>" \
--- a/docs/changelogs/v23.11.2.11-stable.md
+++ b/docs/changelogs/v23.11.2.11-stable.md
@ -0,0 +1,22 @@
+---
+sidebar_position: 1
+sidebar_label: 2023
+---
+
+# 2023 Changelog
+
+### ClickHouse release v23.11.2.11-stable (6e5411358c8) FIXME as compared to v23.11.1.2711-stable (05bc8ef1e02)
+
+#### Improvement
+* Backported in [#57661](https://github.com/ClickHouse/ClickHouse/issues/57661): Handle sigabrt case when getting PostgreSQl table structure with empty array. [#57618](https://github.com/ClickHouse/ClickHouse/pull/57618) ([Mike Kot (Михаил Кот)](https://github.com/myrrc)).
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+
+* Ignore ON CLUSTER clause in grant/revoke queries for management of replicated access entities.  [#57538](https://github.com/ClickHouse/ClickHouse/pull/57538) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
+* Fix SIGSEGV for aggregation of sparse columns with any() RESPECT NULL [#57710](https://github.com/ClickHouse/ClickHouse/pull/57710) ([Azat Khuzhin](https://github.com/azat)).
+* Fix bug window functions: revert [#39631](https://github.com/ClickHouse/ClickHouse/issues/39631) [#57766](https://github.com/ClickHouse/ClickHouse/pull/57766) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Pin alpine version of integration tests helper container [#57669](https://github.com/ClickHouse/ClickHouse/pull/57669) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
--- a/docs/en/development/build-cross-osx.md
+++ b/docs/en/development/build-cross-osx.md
@ -28,18 +28,20 @@ sudo apt-get install clang-17
 Let’s remember the path where we install `cctools` as ${CCTOOLS}

 ``` bash
+mkdir ~/cctools
 export CCTOOLS=$(cd ~/cctools && pwd)
-mkdir ${CCTOOLS}
 cd ${CCTOOLS}

-git clone --depth=1 https://github.com/tpoechtrager/apple-libtapi.git
+git clone https://github.com/tpoechtrager/apple-libtapi.git
 cd apple-libtapi
+git checkout 15dfc2a8c9a2a89d06ff227560a69f5265b692f9
 INSTALLPREFIX=${CCTOOLS} ./build.sh
 ./install.sh
 cd ..

-git clone --depth=1 https://github.com/tpoechtrager/cctools-port.git
+git clone https://github.com/tpoechtrager/cctools-port.git
 cd cctools-port/cctools
+git checkout 2a3e1c2a6ff54a30f898b70cfb9ba1692a55fad7
 ./configure --prefix=$(readlink -f ${CCTOOLS}) --with-libtapi=$(readlink -f ${CCTOOLS}) --target=x86_64-apple-darwin
 make install
 ```
--- a/docs/en/development/build-osx.md
+++ b/docs/en/development/build-osx.md
@ -3,7 +3,7 @@ slug: /en/development/build-osx
 sidebar_position: 65
 sidebar_label: Build on macOS
 title: How to Build ClickHouse on macOS
-description: How to build ClickHouse on macOS
+description: How to build ClickHouse on macOS for macOS
 ---

 :::info You don't have to build ClickHouse yourself!
--- a/docs/en/development/developer-instruction.md
+++ b/docs/en/development/developer-instruction.md
@ -7,42 +7,39 @@ description: Prerequisites and an overview of how to build ClickHouse

 # Getting Started Guide for Building ClickHouse

-The building of ClickHouse is supported on Linux, FreeBSD and macOS.
+ClickHouse can be build on Linux, FreeBSD and macOS. If you use Windows, you can still build ClickHouse in a virtual machine running Linux, e.g. [VirtualBox](https://www.virtualbox.org/) with Ubuntu.

-If you use Windows, you need to create a virtual machine with Ubuntu. To start working with a virtual machine please install VirtualBox. You can download Ubuntu from the website: https://www.ubuntu.com/#download. Please create a virtual machine from the downloaded image (you should reserve at least 4GB of RAM for it). To run a command-line terminal in Ubuntu, please locate a program containing the word “terminal” in its name (gnome-terminal, konsole etc.) or just press Ctrl+Alt+T.
-
-ClickHouse cannot work or build on a 32-bit system. You should acquire access to a 64-bit system and you can continue reading.
+ClickHouse requires a 64-bit system to compile and run, 32-bit systems do not work.

 ## Creating a Repository on GitHub {#creating-a-repository-on-github}

-To start working with ClickHouse repository you will need a GitHub account.
+To start developing for ClickHouse you will need a [GitHub](https://www.virtualbox.org/) account. Please also generate a SSH key locally (if you don't have one already) and upload the public key to GitHub as this is a prerequisite for contributing patches.

-You probably already have one, but if you do not, please register at https://github.com. In case you do not have SSH keys, you should generate them and then upload them on GitHub. It is required for sending over your patches. It is also possible to use the same SSH keys that you use with any other SSH servers - probably you already have those.
+Next, create a fork of the [ClickHouse repository](https://github.com/ClickHouse/ClickHouse/) in your personal account by clicking the "fork" button in the upper right corner.

-Create a fork of ClickHouse repository. To do that please click on the “fork” button in the upper right corner at https://github.com/ClickHouse/ClickHouse. It will fork your own copy of ClickHouse/ClickHouse to your account.
+To contribute, e.g. a fix for an issue or a feature, please commit your changes to a branch in your fork, then create a "pull request" with the changes to the main repository.

-The development process consists of first committing the intended changes into your fork of ClickHouse and then creating a “pull request” for these changes to be accepted into the main repository (ClickHouse/ClickHouse).
+For working with Git repositories, please install `git`. In Ubuntu run these commands in a terminal:

-To work with Git repositories, please install `git`. To do that in Ubuntu you would run in the command line terminal:
+```sh
+sudo apt update
+sudo apt install git
+```

-    sudo apt update
-    sudo apt install git
-
-A brief manual on using Git can be found [here](https://education.github.com/git-cheat-sheet-education.pdf).
-For a detailed manual on Git see [here](https://git-scm.com/book/en/v2).
+A cheatsheet for using Git can be found [here](https://education.github.com/git-cheat-sheet-education.pdf). The detailed manual for Git is [here](https://git-scm.com/book/en/v2).

 ## Cloning a Repository to Your Development Machine {#cloning-a-repository-to-your-development-machine}

-Next, you need to download the source files onto your working machine. This is called “to clone a repository” because it creates a local copy of the repository on your working machine.
+First, download the source files to your working machine, i.e. clone the repository:

-Run in your terminal:
+```sh
+git clone git@github.com:your_github_username/ClickHouse.git  # replace placeholder with your GitHub user name
+cd ClickHouse
+```

-    git clone git@github.com:your_github_username/ClickHouse.git  # replace placeholder with your GitHub user name
-    cd ClickHouse
+This command creates a directory `ClickHouse/` containing the source code of ClickHouse. If you specify a custom checkout directory after the URL but it is important that this path does not contain whitespaces as it may lead to problems with the build later on.

-This command will create a directory `ClickHouse/` containing the source code of ClickHouse. If you specify a custom checkout directory (after the URL), it is important that this path does not contain whitespaces as it may lead to problems with the build system.
-
-To make library dependencies available for the build, the ClickHouse repository uses Git submodules, i.e. references to external repositories. These are not checked out by default. To do so, you can either
+The ClickHouse repository uses Git submodules, i.e. references to external repositories (usually 3rd party libraries used by ClickHouse). These are not checked out by default. To do so, you can either

 - run `git clone` with option `--recurse-submodules`,

@ -52,7 +49,7 @@ To make library dependencies available for the build, the ClickHouse repository

 You can check the Git status with the command: `git submodule status`.

-If you get the following error message:
+If you get the following error message

    Permission denied (publickey).
    fatal: Could not read from remote repository.
@ -60,7 +57,7 @@ If you get the following error message:
    Please make sure you have the correct access rights
    and the repository exists.

-It generally means that the SSH keys for connecting to GitHub are missing. These keys are normally located in `~/.ssh`. For SSH keys to be accepted you need to upload them in the settings section of GitHub UI.
+it generally means that the SSH keys for connecting to GitHub are missing. These keys are normally located in `~/.ssh`. For SSH keys to be accepted you need to upload them in GitHub's settings.

 You can also clone the repository via https protocol:

@ -74,12 +71,17 @@ You can also add original ClickHouse repo address to your local repository to pu

 After successfully running this command you will be able to pull updates from the main ClickHouse repo by running `git pull upstream master`.

+:::note 
+Instructions below assume you are building on Linux. If you are cross-compiling or building on macOS, please also check for operating system and architecture specific guides, such as building [on macOS for macOS](build-osx.md), [on Linux for macOS](build-cross-osx.md), [on Linux for Linux/RISC-V](build-cross-riscv.md) and so on.
+:::
+
 ## Build System {#build-system}

 ClickHouse uses CMake and Ninja for building.

-CMake - a meta-build system that can generate Ninja files (build tasks).
-Ninja - a smaller build system with a focus on the speed used to execute those cmake generated tasks.
+- CMake - a meta-build system that can generate Ninja files (build tasks).
+
+- Ninja - a smaller build system with a focus on the speed used to execute those cmake generated tasks.

 To install on Ubuntu, Debian or Mint run `sudo apt install cmake ninja-build`.

--- a/docs/en/engines/table-engines/special/distributed.md
+++ b/docs/en/engines/table-engines/special/distributed.md
@ -1,13 +1,16 @@
 ---
-slug: /en/engines/table-engines/special/distributed
+sidebar_label: "Distributed"
 sidebar_position: 10
-sidebar_label: Distributed
+slug: /en/engines/table-engines/special/distributed
 ---

 # Distributed Table Engine

-Tables with Distributed engine do not store any data of their own, but allow distributed query processing on multiple servers.
-Reading is automatically parallelized. During a read, the table indexes on remote servers are used, if there are any.
+:::warning
+To create a distributed table engine in the cloud, you can use the [remote and remoteSecure](../../../sql-reference/table-functions/remote) table functions. The `Distributed(...)` syntax cannot be used in ClickHouse Cloud.
+:::
+
+Tables with Distributed engine do not store any data of their own, but allow distributed query processing on multiple servers. Reading is automatically parallelized. During a read, the table indexes on remote servers are used, if there are any.

 ## Creating a Table {#distributed-creating-a-table}

@ -22,6 +25,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
 ```

 ### From a Table {#distributed-from-a-table}
+
 When the `Distributed` table is pointing to a table on the current server you can adopt that table's schema:

 ``` sql
@ -48,7 +52,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] AS [db2.]name2

 Specifying the `sharding_key` is necessary for the following:

- For `INSERTs` into a distributed table (as the table engine needs the `sharding_key` to determine how to split the data). However, if `insert_distributed_one_random_shard` setting is enabled, then `INSERTs` do not need the sharding key
+- For `INSERTs` into a distributed table (as the table engine needs the `sharding_key` to determine how to split the data). However, if `insert_distributed_one_random_shard` setting is enabled, then `INSERTs` do not need the sharding key.
 - For use with `optimize_skip_unused_shards` as the `sharding_key` is necessary to determine what shards should be queried

 #### policy_name
@ -122,9 +126,7 @@ SETTINGS
    fsync_directories=0;
 ```

-Data will be read from all servers in the `logs` cluster, from the `default.hits` table located on every server in the cluster.
-Data is not only read but is partially processed on the remote servers (to the extent that this is possible).
-For example, for a query with `GROUP BY`, data will be aggregated on remote servers, and the intermediate states of aggregate functions will be sent to the requestor server. Then data will be further aggregated.
+Data will be read from all servers in the `logs` cluster, from the `default.hits` table located on every server in the cluster. Data is not only read but is partially processed on the remote servers (to the extent that this is possible). For example, for a query with `GROUP BY`, data will be aggregated on remote servers, and the intermediate states of aggregate functions will be sent to the requestor server. Then data will be further aggregated.

 Instead of the database name, you can use a constant expression that returns a string. For example: `currentDatabase()`.

@ -183,9 +185,7 @@ Clusters are configured in the [server configuration file](../../../operations/c
 </remote_servers>
 ```

-Here a cluster is defined with the name `logs` that consists of two shards, each of which contains two replicas.
-Shards refer to the servers that contain different parts of the data (in order to read all the data, you must access all the shards).
-Replicas are duplicating servers (in order to read all the data, you can access the data on any one of the replicas).
+Here a cluster is defined with the name `logs` that consists of two shards, each of which contains two replicas. Shards refer to the servers that contain different parts of the data (in order to read all the data, you must access all the shards). Replicas are duplicating servers (in order to read all the data, you can access the data on any one of the replicas).

 Cluster names must not contain dots.

@ -198,9 +198,7 @@ The parameters `host`, `port`, and optionally `user`, `password`, `secure`, `com
 - `secure` - Whether to use a secure SSL/TLS connection. Usually also requires specifying the port (the default secure port is `9440`). The server should listen on `<tcp_port_secure>9440</tcp_port_secure>` and be configured with correct certificates.
 - `compression` - Use data compression. Default value: `true`.

-When specifying replicas, one of the available replicas will be selected for each of the shards when reading. You can configure the algorithm for load balancing (the preference for which replica to access) – see the [load_balancing](../../../operations/settings/settings.md#settings-load_balancing) setting.
-If the connection with the server is not established, there will be an attempt to connect with a short timeout. If the connection failed, the next replica will be selected, and so on for all the replicas. If the connection attempt failed for all the replicas, the attempt will be repeated the same way, several times.
-This works in favour of resiliency, but does not provide complete fault tolerance: a remote server might accept the connection, but might not work, or work poorly.
+When specifying replicas, one of the available replicas will be selected for each of the shards when reading. You can configure the algorithm for load balancing (the preference for which replica to access) – see the [load_balancing](../../../operations/settings/settings.md#settings-load_balancing) setting. If the connection with the server is not established, there will be an attempt to connect with a short timeout. If the connection failed, the next replica will be selected, and so on for all the replicas. If the connection attempt failed for all the replicas, the attempt will be repeated the same way, several times. This works in favour of resiliency, but does not provide complete fault tolerance: a remote server might accept the connection, but might not work, or work poorly.

 You can specify just one of the shards (in this case, query processing should be called remote, rather than distributed) or up to any number of shards. In each shard, you can specify from one to any number of replicas. You can specify a different number of replicas for each shard.

--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@ -478,6 +478,7 @@ The CSV format supports the output of totals and extremes the same way as `TabSe
 - [input_format_csv_allow_whitespace_or_tab_as_delimiter](/docs/en/operations/settings/settings-formats.md/# input_format_csv_allow_whitespace_or_tab_as_delimiter) - Allow to use whitespace or tab as field delimiter in CSV strings. Default value - `false`.
 - [input_format_csv_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_csv_allow_variable_number_of_columns) - allow variable number of columns in CSV format, ignore extra columns and use default values on missing columns. Default value - `false`.
 - [input_format_csv_use_default_on_bad_values](/docs/en/operations/settings/settings-formats.md/#input_format_csv_use_default_on_bad_values) - Allow to set default value to column when CSV field deserialization failed on bad value. Default value - `false`.
+- [input_format_csv_try_infer_numbers_from_strings](/docs/en/operations/settings/settings-formats.md/#input_format_csv_try_infer_numbers_from_strings) - Try to infer numbers from string fields while schema inference. Default value - `false`.

 ## CSVWithNames {#csvwithnames}

--- a/docs/en/interfaces/schema-inference.md
+++ b/docs/en/interfaces/schema-inference.md
@ -834,6 +834,27 @@ $$)
 └──────────────┴───────────────┘
 ```

+#### CSV settings {#csv-settings}
+
+##### input_format_csv_try_infer_numbers_from_strings
+
+Enabling this setting allows inferring numbers from string values.
+
+This setting is disabled by default.
+
+**Example:**
+
+```sql
+SET input_format_json_try_infer_numbers_from_strings = 1;
+DESC format(CSV, '"42","42.42"');
+```
+```reponse
+┌─name─┬─type──────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
+│ c1   │ Nullable(Int64)   │              │                    │         │                  │                │
+│ c2   │ Nullable(Float64) │              │                    │         │                  │                │
+└──────┴───────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
+```
+
 ### TSV/TSKV {#tsv-tskv}

 In TSV/TSKV formats ClickHouse extracts column value from the row according to tabular delimiters and then parses extracted value using
@ -1846,3 +1867,102 @@ DESC format(JSONAsString, '{"x" : 42, "y" : "Hello, World!"}') SETTINGS allow_ex
 │ json │ Object('json') │              │                    │         │                  │                │
 └──────┴────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
 ```
+
+## Schema inference modes {#schema-inference-modes}
+
+Schema inference from the set of data files can work in 2 different modes: `default` and `union`.
+The mode is controlled by the setting `schema_inference_mode`. 
+
+### Default mode {#default-schema-inference-mode}
+
+In default mode, ClickHouse assumes that all files have the same schema and tries to infer the schema by reading files one by one until it succeeds.
+
+Example:
+
+Let's say we have 3 files `data1.jsonl`, `data2.jsonl` and `data3.jsonl` with the next content:
+
+`data1.jsonl`:
+```json
+{"field1" :  1, "field2" :  null}
+{"field1" :  2, "field2" :  null}
+{"field1" :  3, "field2" :  null}
+```
+
+`data2.jsonl`:
+```json
+{"field1" :  4, "field2" :  "Data4"}
+{"field1" :  5, "field2" :  "Data5"}
+{"field1" :  6, "field2" :  "Data5"}
+```
+
+`data3.jsonl`:
+```json
+{"field1" :  7, "field2" :  "Data7", "field3" :  [1, 2, 3]}
+{"field1" :  8, "field2" :  "Data8", "field3" :  [4, 5, 6]}
+{"field1" :  9, "field2" :  "Data9", "field3" :  [7, 8, 9]}
+```
+
+Let's try to use schema inference on these 3 files:
+```sql
+:) DESCRIBE file('data{1,2,3}.jsonl') SETTINGS schema_inference_mode='default'
+```
+
+Result:
+```text
+┌─name───┬─type─────────────┐
+│ field1 │ Nullable(Int64)  │
+│ field2 │ Nullable(String) │
+└────────┴──────────────────┘
+```
+
+As we can see, we don't have `field3` from file `data3.jsonl`. 
+It happens because ClickHouse first tried to infer schema from file `data1.jsonl`, failed because of only nulls for field `field2`,
+and then tried to infer schema from `data2.jsonl` and succeeded, so data from file `data3.jsonl` wasn't read.
+
+### Union mode {#default-schema-inference-mode}
+
+In union mode, ClickHouse assumes that files can have different schemas, so it infer schemas of all files and then union them to the common schema. 
+
+Let's say we have 3 files `data1.jsonl`, `data2.jsonl` and `data3.jsonl` with the next content:
+
+`data1.jsonl`:
+```json
+{"field1" :  1}
+{"field1" :  2}
+{"field1" :  3}
+```
+
+`data2.jsonl`:
+```json
+{"field2" :  "Data4"}
+{"field2" :  "Data5"}
+{"field2" :  "Data5"}
+```
+
+`data3.jsonl`:
+```json
+{"field3" :  [1, 2, 3]}
+{"field3" :  [4, 5, 6]}
+{"field3" :  [7, 8, 9]}
+```
+
+Let's try to use schema inference on these 3 files:
+```sql
+:) DESCRIBE file('data{1,2,3}.jsonl') SETTINGS schema_inference_mode='union'
+```
+
+Result:
+```text
+┌─name───┬─type───────────────────┐
+│ field1 │ Nullable(Int64)        │
+│ field2 │ Nullable(String)       │
+│ field3 │ Array(Nullable(Int64)) │
+└────────┴────────────────────────┘
+```
+
+As we can see, we have all fields from all files.
+
+Note:
+- As some of the files may not contain some columns from the resulting schema, union mode is supported only for formats that support reading subset of columns (like JSONEachRow, Parquet, TSVWithNames, etc) and won't work for other formats (like CSV, TSV, JSONCompactEachRow, etc).
+- If ClickHouse cannot infer the schema from one of the files, the exception will be thrown.
+- If you have a lot of files, reading schema from all of them can take a lot of time.
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@ -472,6 +472,39 @@ The value 0 means that you can delete all tables without any restrictions.
 ``` xml
 <max_table_size_to_drop>0</max_table_size_to_drop>
 ```
+  
+
+## max\_database\_num\_to\_warn {#max-database-num-to-warn}  
+If the number of attached databases exceeds the specified value, clickhouse server will add warning messages to `system.warnings` table.    
+Default value: 1000
+
+**Example**
+
+``` xml
+<max_database_num_to_warn>50</max_database_num_to_warn>
+```
+  
+## max\_table\_num\_to\_warn {#max-table-num-to-warn}   
+If the number of attached tables exceeds the specified value, clickhouse server will add warning messages to `system.warnings` table.  
+Default value: 5000    
+
+**Example**
+
+``` xml
+<max_table_num_to_warn>400</max_table_num_to_warn>
+```
+
+
+## max\_part\_num\_to\_warn {#max-part-num-to-warn}  
+If the number of active parts exceeds the specified value, clickhouse server will add warning messages to `system.warnings` table.  
+Default value: 100000  
+
+**Example**
+
+``` xml
+<max_part_num_to_warn>400</max_part_num_to_warn>
+```
+

 ## max_temporary_data_on_disk_size

--- a/docs/en/operations/settings/settings-formats.md
+++ b/docs/en/operations/settings/settings-formats.md
@ -1130,6 +1130,13 @@ Result
 a  0  1971-01-01
 ```

+## input_format_csv_try_infer_numbers_from_strings {#input_format_csv_try_infer_numbers_from_strings}
+
+If enabled, during schema inference ClickHouse will try to infer numbers from string fields.
+It can be useful if CSV data contains quoted UInt64 numbers.
+
+Disabled by default.
+
 ## Values format settings {#values-format-settings}

 ### input_format_values_interpret_expressions {#input_format_values_interpret_expressions}
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@ -4349,6 +4349,8 @@ Default value: `1GiB`.

 ## Schema Inference settings

+See [schema inference](../../interfaces/schema-inference.md#schema-inference-modes) documentation for more details.
+
 ### schema_inference_use_cache_for_file {schema_inference_use_cache_for_file}

 Enable schemas cache for schema inference in `file` table function.
@ -4390,6 +4392,13 @@ Possible values:

 Default value: 2.

+### schema_inference_mode {schema_inference_mode}
+
+The mode of schema inference. Possible values: `default` and `union`.
+See [schema inference modes](../../interfaces/schema-inference.md#schema-inference-modes) section for more details.
+
+Default value: `default`.
+
 ## compatibility {#compatibility}

 The `compatibility` setting causes ClickHouse to use the default settings of a previous version of ClickHouse, where the previous version is provided as the setting.
--- a/docs/en/operations/utilities/clickhouse-local.md
+++ b/docs/en/operations/utilities/clickhouse-local.md
@ -216,7 +216,6 @@ Arguments:
 - `--logger.level` — Log level.
 - `--ignore-error` — do not stop processing if a query failed.
 - `-c`, `--config-file` — path to configuration file in same format as for ClickHouse server, by default the configuration empty.
- `--no-system-tables` — do not attach system tables.
 - `--help` — arguments references for `clickhouse-local`.
 - `-V`, `--version` — print version information and exit.

--- a/docs/en/sql-reference/functions/hash-functions.md
+++ b/docs/en/sql-reference/functions/hash-functions.md
@ -319,9 +319,9 @@ This is a relatively fast non-cryptographic hash function of average quality for
 Calculates a 64-bit hash code from any type of integer.
 It works faster than intHash32. Average quality.

-## SHA1, SHA224, SHA256, SHA512
+## SHA1, SHA224, SHA256, SHA512, SHA512_256

-Calculates SHA-1, SHA-224, SHA-256, SHA-512 hash from a string and returns the resulting set of bytes as [FixedString](/docs/en/sql-reference/data-types/fixedstring.md).
+Calculates SHA-1, SHA-224, SHA-256, SHA-512, SHA-512-256 hash from a string and returns the resulting set of bytes as [FixedString](/docs/en/sql-reference/data-types/fixedstring.md).

 **Syntax**

--- a/docs/en/sql-reference/functions/string-functions.md
+++ b/docs/en/sql-reference/functions/string-functions.md
@ -393,40 +393,6 @@ Reverses the sequence of bytes in a string.

 Reverses a sequence of Unicode code points in a string. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.

-## format
-
-Format the `pattern` string with the strings listed in the arguments, similar to formatting in Python. The pattern string can contain replacement fields surrounded by curly braces `{}`. Anything not contained in braces is considered literal text and copied verbatim into the output. Literal brace character can be escaped by two braces: `{{ '{{' }}` and `{{ '}}' }}`. Field names can be numbers (starting from zero) or empty (then they are implicitly given monotonically increasing numbers).
-
-**Syntax**
-
-```sql
-format(pattern, s0, s1, …)
-```
-
-**Example**
-
-``` sql
-SELECT format('{1} {0} {1}', 'World', 'Hello')
-```
-
-```result
-┌─format('{1} {0} {1}', 'World', 'Hello')─┐
-│ Hello World Hello                       │
-└─────────────────────────────────────────┘
-```
-
-With implicit numbers:
-
-``` sql
-SELECT format('{} {}', 'Hello', 'World')
-```
-
-```result
-┌─format('{} {}', 'Hello', 'World')─┐
-│ Hello World                       │
-└───────────────────────────────────┘
-```
-
 ## concat

 Concatenates the given arguments.
@ -577,26 +543,52 @@ Like `concatWithSeparator` but assumes that `concatWithSeparator(sep, expr1, exp

 A function is called injective if it returns for different arguments different results. In other words: different arguments never produce identical result.

-## substring(s, offset, length)
+## substring

-Returns a substring with `length` many bytes, starting at the byte at index `offset`. Character indexing starts from 1.
+Returns the substring of a string `s` which starts at the specified byte index `offset`. Byte counting starts from 1. If `offset` is 0, an empty string is returned. If `offset` is negative, the substring starts `pos` characters from the end of the string, rather than from the beginning. An optional argument `length` specifies the maximum number of bytes the returned substring may have.

 **Syntax**

 ```sql
-substring(s, offset, length)
+substring(s, offset[, length])
 ```

 Alias:
 - `substr`
 - `mid`

+**Arguments**
+
+- `s` — The string to calculate a substring from. [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md) or [Enum](../../sql-reference/data-types/enum.md)
+- `offset` — The starting position of the substring in `s` . [(U)Int*](../../sql-reference/data-types/int-uint.md).
+- `length` — The maximum length of the substring. [(U)Int*](../../sql-reference/data-types/int-uint.md). Optional.
+
+**Returned value**
+
+A substring of `s` with `length` many bytes, starting at index `offset`.
+
+Type: `String`.
+
+**Example**
+
+``` sql
+SELECT 'database' AS db, substr(db, 5), substr(db, 5, 1)
+```
+
+Result:
+
+```result
+┌─db───────┬─substring('database', 5)─┬─substring('database', 5, 1)─┐
+│ database │ base                     │ b                           │
+└──────────┴──────────────────────────┴─────────────────────────────┘
+```
+
 ## substringUTF8

 Like `substring` but for Unicode code points. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.


-## substringIndex(s, delim, count)
+## substringIndex

 Returns the substring of `s` before `count` occurrences of the delimiter `delim`, as in Spark or MySQL.

@ -627,7 +619,7 @@ Result:
 └──────────────────────────────────────────────┘
 ```

-## substringIndexUTF8(s, delim, count)
+## substringIndexUTF8

 Like `substringIndex` but for Unicode code points. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.

--- a/docs/en/sql-reference/functions/string-replace-functions.md
+++ b/docs/en/sql-reference/functions/string-replace-functions.md
@ -132,6 +132,40 @@ For more information, see [RE2](https://github.com/google/re2/blob/master/re2/re
 regexpQuoteMeta(s)
 ```

+## format
+
+Format the `pattern` string with the values (strings, integers, etc.) listed in the arguments, similar to formatting in Python. The pattern string can contain replacement fields surrounded by curly braces `{}`. Anything not contained in braces is considered literal text and copied verbatim into the output. Literal brace character can be escaped by two braces: `{{ '{{' }}` and `{{ '}}' }}`. Field names can be numbers (starting from zero) or empty (then they are implicitly given monotonically increasing numbers).
+
+**Syntax**
+
+```sql
+format(pattern, s0, s1, …)
+```
+
+**Example**
+
+``` sql
+SELECT format('{1} {0} {1}', 'World', 'Hello')
+```
+
+```result
+┌─format('{1} {0} {1}', 'World', 'Hello')─┐
+│ Hello World Hello                       │
+└─────────────────────────────────────────┘
+```
+
+With implicit numbers:
+
+``` sql
+SELECT format('{} {}', 'Hello', 'World')
+```
+
+```result
+┌─format('{} {}', 'Hello', 'World')─┐
+│ Hello World                       │
+└───────────────────────────────────┘
+```
+
 ## translate

 Replaces characters in the string `s` using a one-to-one character mapping defined by `from` and `to` strings. `from` and `to` must be constant ASCII strings of the same size. Non-ASCII characters in the original string are not modified.
--- a/docs/en/sql-reference/statements/create/view.md
+++ b/docs/en/sql-reference/statements/create/view.md
@ -90,152 +90,11 @@ Views look the same as normal tables. For example, they are listed in the result

 To delete a view, use [DROP VIEW](../../../sql-reference/statements/drop.md#drop-view). Although `DROP TABLE` works for VIEWs as well.

-## Live View [Experimental]
+## Live View [Deprecated]

-:::note    
-This is an experimental feature that may change in backwards-incompatible ways in the future releases. Enable usage of live views and `WATCH` query using [allow_experimental_live_view](../../../operations/settings/settings.md#allow-experimental-live-view) setting. Input the command `set allow_experimental_live_view = 1`.
-:::
+This feature is deprecated and will be removed in the future.

-```sql
-CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH REFRESH [value_in_sec]] AS SELECT ...
-```
-
-Live views store result of the corresponding [SELECT](../../../sql-reference/statements/select/index.md) query and are updated any time the result of the query changes. Query result as well as partial result needed to combine with new data are stored in memory providing increased performance for repeated queries. Live views can provide push notifications when query result changes using the [WATCH](../../../sql-reference/statements/watch.md) query.
-
-Live views are triggered by insert into the innermost table specified in the query.
-
-Live views work similarly to how a query in a distributed table works. But instead of combining partial results from different servers they combine partial result from current data with partial result from the new data. When a live view query includes a subquery then the cached partial result is only stored for the innermost subquery.
-
-:::info    
- [Table function](../../../sql-reference/table-functions/index.md) is not supported as the innermost table.
- Tables that do not have inserts such as a [dictionary](../../../sql-reference/dictionaries/index.md), [system table](../../../operations/system-tables/index.md), a [normal view](#normal), or a [materialized view](#materialized) will not trigger a live view.
- Only queries where one can combine partial result from the old data plus partial result from the new data will work. Live view will not work for queries that require the complete data set to compute the final result or aggregations where the state of the aggregation must be preserved.
- Does not work with replicated or distributed tables where inserts are performed on different nodes.
- Can't be triggered by multiple tables.
-
-See [WITH REFRESH](#live-view-with-refresh) to force periodic updates of a live view that in some cases can be used as a workaround.
-:::
-
-### Monitoring Live View Changes
-
-You can monitor changes in the `LIVE VIEW` query result using [WATCH](../../../sql-reference/statements/watch.md) query.
-
-```sql
-WATCH [db.]live_view
-```
-
-**Example:**
-
-```sql
-CREATE TABLE mt (x Int8) Engine = MergeTree ORDER BY x;
-CREATE LIVE VIEW lv AS SELECT sum(x) FROM mt;
-```
-Watch a live view while doing a parallel insert into the source table.
-
-```sql
-WATCH lv;
-```
-
-```bash
-┌─sum(x)─┬─_version─┐
-│      1 │        1 │
-└────────┴──────────┘
-┌─sum(x)─┬─_version─┐
-│      3 │        2 │
-└────────┴──────────┘
-┌─sum(x)─┬─_version─┐
-│      6 │        3 │
-└────────┴──────────┘
-```
-
-```sql
-INSERT INTO mt VALUES (1);
-INSERT INTO mt VALUES (2);
-INSERT INTO mt VALUES (3);
-```
-
-Or add [EVENTS](../../../sql-reference/statements/watch.md#events-clause) clause to just get change events.
-
-```sql
-WATCH [db.]live_view EVENTS;
-```
-
-**Example:**
-
-```sql
-WATCH lv EVENTS;
-```
-
-```bash
-┌─version─┐
-│       1 │
-└─────────┘
-┌─version─┐
-│       2 │
-└─────────┘
-┌─version─┐
-│       3 │
-└─────────┘
-```
-
-You can execute [SELECT](../../../sql-reference/statements/select/index.md) query on a live view in the same way as for any regular view or a table. If the query result is cached it will return the result immediately without running the stored query on the underlying tables.
-
-```sql
-SELECT * FROM [db.]live_view WHERE ...
-```
-
-### Force Live View Refresh
-
-You can force live view refresh using the `ALTER LIVE VIEW [db.]table_name REFRESH` statement.
-
-### WITH REFRESH Clause
-
-When a live view is created with a `WITH REFRESH` clause then it will be automatically refreshed after the specified number of seconds elapse since the last refresh or trigger.
-
-```sql
-CREATE LIVE VIEW [db.]table_name WITH REFRESH [value_in_sec] AS SELECT ...
-```
-
-If the refresh value is not specified then the value specified by the [periodic_live_view_refresh](../../../operations/settings/settings.md#periodic-live-view-refresh) setting is used.
-
-**Example:**
-
-```sql
-CREATE LIVE VIEW lv WITH REFRESH 5 AS SELECT now();
-WATCH lv
-```
-
-```bash
-┌───────────────now()─┬─_version─┐
-│ 2021-02-21 08:47:05 │        1 │
-└─────────────────────┴──────────┘
-┌───────────────now()─┬─_version─┐
-│ 2021-02-21 08:47:10 │        2 │
-└─────────────────────┴──────────┘
-┌───────────────now()─┬─_version─┐
-│ 2021-02-21 08:47:15 │        3 │
-└─────────────────────┴──────────┘
-```
-
-```sql
-WATCH lv
-```
-
-```
-Code: 60. DB::Exception: Received from localhost:9000. DB::Exception: Table default.lv does not exist..
-```
-
-### Live View Usage
-
-Most common uses of live view tables include:
-
- Providing push notifications for query result changes to avoid polling.
- Caching results of most frequent queries to provide immediate query results.
- Watching for table changes and triggering a follow-up select queries.
- Watching metrics from system tables using periodic refresh.
-
-**See Also**
- [ALTER LIVE VIEW](../alter/view.md#alter-live-view)
+For your convenience, the old documentation is located [here](https://pastila.nl/?00f32652/fdf07272a7b54bda7e13b919264e449f.md)

 ## Window View [Experimental]

--- a/docs/en/sql-reference/statements/select/into-outfile.md
+++ b/docs/en/sql-reference/statements/select/into-outfile.md
@ -12,7 +12,7 @@ Compressed files are supported. Compression type is detected by the extension of
 **Syntax**

 ```sql
-SELECT <expr_list> INTO OUTFILE file_name [AND STDOUT] [APPEND] [COMPRESSION type [LEVEL level]]
+SELECT <expr_list> INTO OUTFILE file_name [AND STDOUT] [APPEND | TRUNCATE] [COMPRESSION type [LEVEL level]]
 ```

 `file_name` and `type` are string literals. Supported compression types are: `'none'`, `'gzip'`, `'deflate'`, `'br'`, `'xz'`, `'zstd'`, `'lz4'`, `'bz2'`.
@ -26,6 +26,7 @@ SELECT <expr_list> INTO OUTFILE file_name [AND STDOUT] [APPEND] [COMPRESSION typ
 - The default [output format](../../../interfaces/formats.md) is `TabSeparated` (like in the command-line client batch mode). Use [FORMAT](format.md) clause to change it.
 - If `AND STDOUT` is mentioned in the query then the output that is written to the file is also displayed on standard output. If used with compression, the plaintext is displayed on standard output.
 - If `APPEND` is mentioned in the query then the output is appended to an existing file. If compression is used, append cannot be used.
+- When writing to a file that already exists, `APPEND` or `TRUNCATE` must be used.

 **Example**

--- a/docs/en/sql-reference/table-functions/fuzzJSON.md
+++ b/docs/en/sql-reference/table-functions/fuzzJSON.md
@ -19,6 +19,7 @@ fuzzJSON({ named_collection [option=value [,..]] | json_str[, random_seed] })
 - `json_str` (String) - The source string representing structured data in JSON format.
 - `random_seed` (UInt64) - Manual random seed for producing stable results.
 - `reuse_output` (boolean) - Reuse the output from a fuzzing process as input for the next fuzzer.
+ - `malform_output` (boolean) - Generate a string that cannot be parsed as a JSON object.
 - `max_output_length` (UInt64) - Maximum allowable length of the generated or perturbed JSON string.
 - `probability` (Float64) - The probability to fuzz a JSON field (a key-value pair). Must be within [0, 1] range.
 - `max_nesting_level` (UInt64) - The maximum allowed depth of nested structures within the JSON data.
@ -84,3 +85,13 @@ SELECT * FROM fuzzJSON('{"id":1}', 1234) LIMIT 3;
 {"BRjE":16137826149911306846}
 {"XjKE":15076727133550123563}
 ```
+
+``` sql
+SELECT * FROM fuzzJSON(json_nc, json_str='{"name" : "FuzzJSON"}', random_seed=1337, malform_output=true) LIMIT 3;
+```
+
+``` text
+U"name":"FuzzJSON*"SpByjZKtr2VAyHCO"falseh
+{"name"keFuzzJSON, "g6vVO7TCIk":jTt^
+{"DBhz":YFuzzJSON5}
+```
--- a/docs/ru/operations/utilities/clickhouse-local.md
+++ b/docs/ru/operations/utilities/clickhouse-local.md
@ -45,7 +45,6 @@ $ clickhouse-local --structure "table_structure" --input-format "format_of_incom
 -   `--logger.level` — уровень логирования.
 -   `--ignore-error` — не прекращать обработку если запрос выдал ошибку.
 -   `-c`, `--config-file` — путь к файлу конфигурации. По умолчанию `clickhouse-local` запускается с пустой конфигурацией. Конфигурационный файл имеет тот же формат, что и для сервера ClickHouse, и в нём можно использовать все конфигурационные параметры сервера. Обычно подключение конфигурации не требуется; если требуется установить отдельный параметр, то это можно сделать ключом с именем параметра.
-   `--no-system-tables` — запуск без использования системных таблиц.
 -   `--help` — вывод справочной информации о `clickhouse-local`.
 -   `-V`, `--version` — вывод текущей версии и выход.

--- a/docs/zh/operations/utilities/clickhouse-local.md
+++ b/docs/zh/operations/utilities/clickhouse-local.md
@ -45,7 +45,6 @@ clickhouse-local --structure "table_structure" --input-format "format_of_incomin
 -   `--logger.level` — 日志级别。
 -   `--ignore-error` — 当查询失败时，不停止处理。
 -   `-c`, `--config-file` — 与ClickHouse服务器格式相同配置文件的路径，默认情况下配置为空。
-   `--no-system-tables` — 不附加系统表。
 -   `--help` — `clickhouse-local`使用帮助信息。
 -   `-V`, `--version` — 打印版本信息并退出。

--- a/programs/keeper/CMakeLists.txt
+++ b/programs/keeper/CMakeLists.txt
@ -68,6 +68,7 @@ if (BUILD_STANDALONE_KEEPER)
        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/waitServersToFinish.cpp
        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/ServerType.cpp
        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTPRequestHandlerFactoryMain.cpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/KeeperReadinessHandler.cpp
        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServer.cpp
        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/ReadHeaders.cpp
        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServerConnection.cpp
--- a/programs/keeper/Keeper.cpp
+++ b/programs/keeper/Keeper.cpp
@ -14,6 +14,7 @@
 #include <Common/assertProcessUserMatchesDataOwner.h>
 #include <Common/makeSocketAddress.h>
 #include <Server/waitServersToFinish.h>
+#include <base/getMemoryAmount.h>
 #include <base/scope_guard.h>
 #include <base/safeExit.h>
 #include <Poco/Net/NetException.h>
@ -32,6 +33,7 @@
 #include <Server/HTTP/HTTPServer.h>
 #include <Server/TCPServer.h>
 #include <Server/HTTPHandlerFactory.h>
+#include <Server/KeeperReadinessHandler.h>

 #include "Core/Defines.h"
 #include "config.h"
@ -289,6 +291,33 @@ try
    if (!config().has("keeper_server"))
        throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Keeper configuration (<keeper_server> section) not found in config");

+    auto updateMemorySoftLimitInConfig = [&](Poco::Util::AbstractConfiguration & config)
+    {
+        UInt64 memory_soft_limit = 0;
+        if (config.has("keeper_server.max_memory_usage_soft_limit"))
+        {
+            memory_soft_limit = config.getUInt64("keeper_server.max_memory_usage_soft_limit");
+        }
+
+        /// if memory soft limit is not set, we will use default value
+        if (memory_soft_limit == 0)
+        {
+            Float64 ratio = 0.9;
+            if (config.has("keeper_server.max_memory_usage_soft_limit_ratio"))
+                ratio = config.getDouble("keeper_server.max_memory_usage_soft_limit_ratio");
+
+            size_t physical_server_memory = getMemoryAmount();
+            if (ratio > 0 && physical_server_memory > 0)
+            {
+                memory_soft_limit = static_cast<UInt64>(physical_server_memory * ratio);
+                config.setUInt64("keeper_server.max_memory_usage_soft_limit", memory_soft_limit);
+            }
+        }
+        LOG_INFO(log, "keeper_server.max_memory_usage_soft_limit is set to {}", formatReadableSizeWithBinarySuffix(memory_soft_limit));
+    };
+
+    updateMemorySoftLimitInConfig(config());
+
    std::string path;

    if (config().has("keeper_server.storage_path"))
@ -328,6 +357,13 @@ try
        config().getUInt("max_thread_pool_free_size", 1000),
        config().getUInt("thread_pool_queue_size", 10000)
    );
+    /// Wait for all threads to avoid possible use-after-free (for example logging objects can be already destroyed).
+    SCOPE_EXIT({
+        Stopwatch watch;
+        LOG_INFO(log, "Waiting for background threads");
+        GlobalThreadPool::instance().shutdown();
+        LOG_INFO(log, "Background threads finished in {} ms", watch.elapsedMilliseconds());
+    });

    static ServerErrorHandler error_handler;
    Poco::ErrorHandler::set(&error_handler);
@ -459,6 +495,29 @@ try
                std::make_unique<HTTPServer>(
                    std::move(my_http_context), createPrometheusMainHandlerFactory(*this, config_getter(), async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params));
        });
+
+        /// HTTP control endpoints
+        port_name = "keeper_server.http_control.port";
+        createServer(listen_host, port_name, listen_try, [&](UInt16 port) mutable
+        {
+            auto my_http_context = httpContext();
+            Poco::Timespan my_keep_alive_timeout(config.getUInt("keep_alive_timeout", 10), 0);
+            Poco::Net::HTTPServerParams::Ptr my_http_params = new Poco::Net::HTTPServerParams;
+            my_http_params->setTimeout(my_http_context->getReceiveTimeout());
+            my_http_params->setKeepAliveTimeout(my_keep_alive_timeout);
+
+            Poco::Net::ServerSocket socket;
+            auto address = socketBindListen(socket, listen_host, port);
+            socket.setReceiveTimeout(my_http_context->getReceiveTimeout());
+            socket.setSendTimeout(my_http_context->getSendTimeout());
+            servers->emplace_back(
+                listen_host,
+                port_name,
+                "HTTP Control: http://" + address.toString(),
+                std::make_unique<HTTPServer>(
+                    std::move(my_http_context), createKeeperHTTPControlMainHandlerFactory(config_getter(), global_context->getKeeperDispatcher(), "KeeperHTTPControlHandler-factory"), server_pool, socket, http_params)
+                    );
+        });
    }

    for (auto & server : *servers)
@ -492,6 +551,8 @@ try
        {
            updateLevels(*config, logger());

+            updateMemorySoftLimitInConfig(*config);
+
            if (config->has("keeper_server"))
                global_context->updateKeeperConfiguration(*config);

--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@ -744,7 +744,7 @@ void LocalServer::processConfig()

        LOG_DEBUG(log, "Loading metadata from {}", path);
        auto startup_system_tasks = loadMetadataSystem(global_context);
-        attachSystemTablesLocal(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::SYSTEM_DATABASE));
+        attachSystemTablesLocal</* lazy= */ true>(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::SYSTEM_DATABASE));
        attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA));
        attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE));
        waitLoad(TablesLoaderForegroundPoolId, startup_system_tasks);
@ -761,9 +761,9 @@ void LocalServer::processConfig()

        LOG_DEBUG(log, "Loaded metadata.");
    }
-    else if (!config().has("no-system-tables"))
+    else
    {
-        attachSystemTablesLocal(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::SYSTEM_DATABASE));
+        attachSystemTablesLocal</* lazy= */ true>(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::SYSTEM_DATABASE));
        attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA));
        attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE));
    }
@ -842,7 +842,6 @@ void LocalServer::addOptions(OptionsDescription & options_description)
        ("logger.log", po::value<std::string>(), "Log file name")
        ("logger.level", po::value<std::string>(), "Log level")

-        ("no-system-tables", "do not attach system tables (better startup time)")
        ("path", po::value<std::string>(), "Storage path")
        ("only-system-tables", "attach only system tables from specified path")
        ("top_level_domains_path", po::value<std::string>(), "Path to lists with custom TLDs")
@ -871,8 +870,6 @@ void LocalServer::processOptions(const OptionsDescription &, const CommandLineOp
        config().setString("table-file", options["file"].as<std::string>());
    if (options.count("structure"))
        config().setString("table-structure", options["structure"].as<std::string>());
-    if (options.count("no-system-tables"))
-        config().setBool("no-system-tables", true);
    if (options.count("only-system-tables"))
        config().setBool("only-system-tables", true);
    if (options.count("database"))
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@ -93,6 +93,7 @@
 #include <Server/ProxyV1HandlerFactory.h>
 #include <Server/TLSHandlerFactory.h>
 #include <Server/ProtocolServerAdapter.h>
+#include <Server/KeeperReadinessHandler.h>
 #include <Server/HTTP/HTTPServer.h>
 #include <Interpreters/AsynchronousInsertQueue.h>
 #include <Core/ServerSettings.h>
@ -658,6 +659,11 @@ try
    CurrentMetrics::set(CurrentMetrics::Revision, ClickHouseRevision::getVersionRevision());
    CurrentMetrics::set(CurrentMetrics::VersionInteger, ClickHouseRevision::getVersionInteger());

+    Poco::ThreadPool server_pool(3, server_settings.max_connections);
+    std::mutex servers_lock;
+    std::vector<ProtocolServerAdapter> servers;
+    std::vector<ProtocolServerAdapter> servers_to_start_before_tables;
+
    /** Context contains all that query execution is dependent:
      *  settings, available functions, data types, aggregate functions, databases, ...
      */
@ -698,6 +704,68 @@ try
        server_settings.max_thread_pool_size,
        server_settings.max_thread_pool_free_size,
        server_settings.thread_pool_queue_size);
+    /// Wait for all threads to avoid possible use-after-free (for example logging objects can be already destroyed).
+    SCOPE_EXIT({
+        Stopwatch watch;
+        LOG_INFO(log, "Waiting for background threads");
+        GlobalThreadPool::instance().shutdown();
+        LOG_INFO(log, "Background threads finished in {} ms", watch.elapsedMilliseconds());
+    });
+
+    /// NOTE: global context should be destroyed *before* GlobalThreadPool::shutdown()
+    /// Otherwise GlobalThreadPool::shutdown() will hang, since Context holds some threads.
+    SCOPE_EXIT({
+        /** Ask to cancel background jobs all table engines,
+          *  and also query_log.
+          * It is important to do early, not in destructor of Context, because
+          *  table engines could use Context on destroy.
+          */
+        LOG_INFO(log, "Shutting down storages.");
+
+        global_context->shutdown();
+
+        LOG_DEBUG(log, "Shut down storages.");
+
+        if (!servers_to_start_before_tables.empty())
+        {
+            LOG_DEBUG(log, "Waiting for current connections to servers for tables to finish.");
+            size_t current_connections = 0;
+            {
+                std::lock_guard lock(servers_lock);
+                for (auto & server : servers_to_start_before_tables)
+                {
+                    server.stop();
+                    current_connections += server.currentConnections();
+                }
+            }
+
+            if (current_connections)
+                LOG_INFO(log, "Closed all listening sockets. Waiting for {} outstanding connections.", current_connections);
+            else
+                LOG_INFO(log, "Closed all listening sockets.");
+
+            if (current_connections > 0)
+                current_connections = waitServersToFinish(servers_to_start_before_tables, servers_lock, server_settings.shutdown_wait_unfinished);
+
+            if (current_connections)
+                LOG_INFO(log, "Closed connections to servers for tables. But {} remain. Probably some tables of other users cannot finish their connections after context shutdown.", current_connections);
+            else
+                LOG_INFO(log, "Closed connections to servers for tables.");
+        }
+
+        global_context->shutdownKeeperDispatcher();
+
+        /// Wait server pool to avoid use-after-free of destroyed context in the handlers
+        server_pool.joinAll();
+
+        /** Explicitly destroy Context. It is more convenient than in destructor of Server, because logger is still available.
+          * At this moment, no one could own shared part of Context.
+          */
+        global_context.reset();
+        shared_context.reset();
+        LOG_DEBUG(log, "Destroyed global context.");
+    });
+

 #if USE_AZURE_BLOB_STORAGE
    /// It makes sense to deinitialize libxml after joining of all threads
@ -756,10 +824,6 @@ try
        }
    }

-    Poco::ThreadPool server_pool(3, server_settings.max_connections);
-    std::mutex servers_lock;
-    std::vector<ProtocolServerAdapter> servers;
-    std::vector<ProtocolServerAdapter> servers_to_start_before_tables;
    /// This object will periodically calculate some metrics.
    ServerAsynchronousMetrics async_metrics(
        global_context,
@ -1282,6 +1346,9 @@ try

            global_context->setMaxTableSizeToDrop(server_settings_.max_table_size_to_drop);
            global_context->setMaxPartitionSizeToDrop(server_settings_.max_partition_size_to_drop);
+            global_context->setMaxTableNumToWarn(server_settings_.max_table_num_to_warn);
+            global_context->setMaxDatabaseNumToWarn(server_settings_.max_database_num_to_warn);
+            global_context->setMaxPartNumToWarn(server_settings_.max_part_num_to_warn);

            ConcurrencyControl::SlotCount concurrent_threads_soft_limit = ConcurrencyControl::Unlimited;
            if (server_settings_.concurrent_threads_soft_limit_num > 0 && server_settings_.concurrent_threads_soft_limit_num < concurrent_threads_soft_limit)
@ -1491,6 +1558,34 @@ try
                    throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.");
 #endif
                });
+
+            /// HTTP control endpoints
+            port_name = "keeper_server.http_control.port";
+            createServer(config(), listen_host, port_name, listen_try, /* start_server: */ false,
+            servers_to_start_before_tables,
+            [&](UInt16 port) -> ProtocolServerAdapter
+            {
+                auto http_context = httpContext();
+                Poco::Timespan keep_alive_timeout(config().getUInt("keep_alive_timeout", 10), 0);
+                Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams;
+                http_params->setTimeout(http_context->getReceiveTimeout());
+                http_params->setKeepAliveTimeout(keep_alive_timeout);
+
+                Poco::Net::ServerSocket socket;
+                auto address = socketBindListen(config(), socket, listen_host, port);
+                socket.setReceiveTimeout(http_context->getReceiveTimeout());
+                socket.setSendTimeout(http_context->getSendTimeout());
+                return ProtocolServerAdapter(
+                    listen_host,
+                    port_name,
+                    "HTTP Control: http://" + address.toString(),
+                    std::make_unique<HTTPServer>(
+                        std::move(http_context),
+                        createKeeperHTTPControlMainHandlerFactory(
+                            config_getter(),
+                            global_context->getKeeperDispatcher(),
+                            "KeeperHTTPControlHandler-factory"), server_pool, socket, http_params));
+            });
        }
 #else
        throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "ClickHouse server built without NuRaft library. Cannot use internal coordination.");
@ -1601,60 +1696,6 @@ try
    /// try set up encryption. There are some errors in config, error will be printed and server wouldn't start.
    CompressionCodecEncrypted::Configuration::instance().load(config(), "encryption_codecs");

-    SCOPE_EXIT({
-        async_metrics.stop();
-
-        /** Ask to cancel background jobs all table engines,
-          *  and also query_log.
-          * It is important to do early, not in destructor of Context, because
-          *  table engines could use Context on destroy.
-          */
-        LOG_INFO(log, "Shutting down storages.");
-
-        global_context->shutdown();
-
-        LOG_DEBUG(log, "Shut down storages.");
-
-        if (!servers_to_start_before_tables.empty())
-        {
-            LOG_DEBUG(log, "Waiting for current connections to servers for tables to finish.");
-            size_t current_connections = 0;
-            {
-                std::lock_guard lock(servers_lock);
-                for (auto & server : servers_to_start_before_tables)
-                {
-                    server.stop();
-                    current_connections += server.currentConnections();
-                }
-            }
-
-            if (current_connections)
-                LOG_INFO(log, "Closed all listening sockets. Waiting for {} outstanding connections.", current_connections);
-            else
-                LOG_INFO(log, "Closed all listening sockets.");
-
-            if (current_connections > 0)
-                current_connections = waitServersToFinish(servers_to_start_before_tables, servers_lock, server_settings.shutdown_wait_unfinished);
-
-            if (current_connections)
-                LOG_INFO(log, "Closed connections to servers for tables. But {} remain. Probably some tables of other users cannot finish their connections after context shutdown.", current_connections);
-            else
-                LOG_INFO(log, "Closed connections to servers for tables.");
-
-            global_context->shutdownKeeperDispatcher();
-        }
-
-        /// Wait server pool to avoid use-after-free of destroyed context in the handlers
-        server_pool.joinAll();
-
-        /** Explicitly destroy Context. It is more convenient than in destructor of Server, because logger is still available.
-          * At this moment, no one could own shared part of Context.
-          */
-        global_context.reset();
-        shared_context.reset();
-        LOG_DEBUG(log, "Destroyed global context.");
-    });
-
    /// DNSCacheUpdater uses BackgroundSchedulePool which lives in shared context
    /// and thus this object must be created after the SCOPE_EXIT object where shared
    /// context is destroyed.
--- a/rust/CMakeLists.txt
+++ b/rust/CMakeLists.txt
@ -14,6 +14,10 @@ macro(configure_rustc)
        set(RUST_CFLAGS "${RUST_CFLAGS} --sysroot ${CMAKE_SYSROOT}")
    endif()

+    if (USE_MUSL)
+        set(RUST_CXXFLAGS "${RUST_CXXFLAGS} -D_LIBCPP_HAS_MUSL_LIBC=1")
+    endif ()
+
    if(CCACHE_EXECUTABLE MATCHES "/sccache$")
        message(STATUS "Using RUSTC_WRAPPER: ${CCACHE_EXECUTABLE}")
        set(RUSTCWRAPPER "rustc-wrapper = \"${CCACHE_EXECUTABLE}\"")
--- a/src/AggregateFunctions/AggregateFunctionAny.cpp
+++ b/src/AggregateFunctions/AggregateFunctionAny.cpp
@ -110,7 +110,7 @@ public:
                }
            }
        }
-        else
+        else if (row_begin < row_end)
        {
            size_t pos = First ? row_begin : row_end - 1;
            add(place, columns, pos, arena);
--- a/src/AggregateFunctions/AggregateFunctionSumMap.cpp
+++ b/src/AggregateFunctions/AggregateFunctionSumMap.cpp
@ -254,11 +254,20 @@ public:
            if (it != merged_maps.end())
            {
                for (size_t col = 0; col < values_types.size(); ++col)
+                {
                    if (!elem.second[col].isNull())
-                        applyVisitor(Visitor(elem.second[col]), it->second[col]);
+                    {
+                        if (it->second[col].isNull())
+                            it->second[col] = elem.second[col];
+                        else
+                            applyVisitor(Visitor(elem.second[col]), it->second[col]);
+                    }
+                }
            }
            else
+            {
                merged_maps[elem.first] = elem.second;
+            }
        }
    }

--- a/src/AggregateFunctions/IAggregateFunction.h
+++ b/src/AggregateFunctions/IAggregateFunction.h
@ -549,8 +549,10 @@ public:
        auto to = std::lower_bound(offsets.begin(), offsets.end(), row_end) - offsets.begin() + 1;

        size_t num_defaults = (row_end - row_begin) - (to - from);
-        static_cast<const Derived *>(this)->addBatchSinglePlace(from, to, place, &values, arena, -1);
-        static_cast<const Derived *>(this)->addManyDefaults(place, &values, num_defaults, arena);
+        if (from < to)
+            static_cast<const Derived *>(this)->addBatchSinglePlace(from, to, place, &values, arena, -1);
+        if (num_defaults > 0)
+            static_cast<const Derived *>(this)->addManyDefaults(place, &values, num_defaults, arena);
    }

    void addBatchSinglePlaceNotNull( /// NOLINT
--- a/src/Backups/BackupCoordinationRemote.cpp
+++ b/src/Backups/BackupCoordinationRemote.cpp
@ -184,12 +184,12 @@ BackupCoordinationRemote::BackupCoordinationRemote(
            if (my_is_internal)
            {
                String alive_node_path = my_zookeeper_path + "/stage/alive|" + my_current_host;
-                auto code = zk->tryCreate(alive_node_path, "", zkutil::CreateMode::Ephemeral);

-                if (code == Coordination::Error::ZNODEEXISTS)
-                    zk->handleEphemeralNodeExistenceNoFailureInjection(alive_node_path, "");
-                else if (code != Coordination::Error::ZOK)
-                    throw zkutil::KeeperException::fromPath(code, alive_node_path);
+                /// Delete the ephemeral node from the previous connection so we don't have to wait for keeper to do it automatically.
+                zk->tryRemove(alive_node_path);
+
+                zk->createAncestors(alive_node_path);
+                zk->create(alive_node_path, "", zkutil::CreateMode::Ephemeral);
            }
        })
 {
--- a/src/Backups/BackupCoordinationStageSync.cpp
+++ b/src/Backups/BackupCoordinationStageSync.cpp
@ -60,12 +60,6 @@ void BackupCoordinationStageSync::set(const String & current_host, const String
        }
        else
        {
-            /// Make an ephemeral node so the initiator can track if the current host is still working.
-            String alive_node_path = zookeeper_path + "/alive|" + current_host;
-            auto code = zookeeper->tryCreate(alive_node_path, "", zkutil::CreateMode::Ephemeral);
-            if (code != Coordination::Error::ZOK && code != Coordination::Error::ZNODEEXISTS)
-                throw zkutil::KeeperException::fromPath(code, alive_node_path);
-
            zookeeper->createIfNotExists(zookeeper_path + "/started|" + current_host, "");
            zookeeper->createIfNotExists(zookeeper_path + "/current|" + current_host + "|" + new_stage, message);
        }
@ -106,39 +100,36 @@ Strings BackupCoordinationStageSync::waitFor(const Strings & all_hosts, const St

 namespace
 {
-    struct UnreadyHostState
+    struct UnreadyHost
    {
+        String host;
        bool started = false;
-        bool alive = false;
    };
 }

 struct BackupCoordinationStageSync::State
 {
-    Strings results;
-    std::map<String, UnreadyHostState> unready_hosts;
+    std::optional<Strings> results;
    std::optional<std::pair<String, Exception>> error;
-    std::optional<String> host_terminated;
+    std::optional<String> disconnected_host;
+    std::optional<UnreadyHost> unready_host;
 };

 BackupCoordinationStageSync::State BackupCoordinationStageSync::readCurrentState(
-    const Strings & zk_nodes, const Strings & all_hosts, const String & stage_to_wait) const
+    WithRetries::RetriesControlHolder & retries_control_holder,
+    const Strings & zk_nodes,
+    const Strings & all_hosts,
+    const String & stage_to_wait) const
 {
+    auto zookeeper = retries_control_holder.faulty_zookeeper;
+    auto & retries_ctl = retries_control_holder.retries_ctl;
+
    std::unordered_set<std::string_view> zk_nodes_set{zk_nodes.begin(), zk_nodes.end()};

    State state;
    if (zk_nodes_set.contains("error"))
    {
-        String errors;
-        {
-            auto holder = with_retries.createRetriesControlHolder("readCurrentState");
-            holder.retries_ctl.retryLoop(
-                [&, &zookeeper = holder.faulty_zookeeper]()
-                {
-                    with_retries.renewZooKeeper(zookeeper);
-                    errors = zookeeper->get(zookeeper_path + "/error");
-                });
-        }
+        String errors = zookeeper->get(zookeeper_path + "/error");
        ReadBufferFromOwnString buf{errors};
        String host;
        readStringBinary(host, buf);
@ -146,64 +137,50 @@ BackupCoordinationStageSync::State BackupCoordinationStageSync::readCurrentState
        return state;
    }

+    std::optional<UnreadyHost> unready_host;
+
    for (const auto & host : all_hosts)
    {
        if (!zk_nodes_set.contains("current|" + host + "|" + stage_to_wait))
        {
-            UnreadyHostState unready_host_state;
            const String started_node_name = "started|" + host;
            const String alive_node_name = "alive|" + host;
-            const String alive_node_path = zookeeper_path + "/" + alive_node_name;
-            unready_host_state.started = zk_nodes_set.contains(started_node_name);

-            /// Because we do retries everywhere we can't fully rely on ephemeral nodes anymore.
-            /// Though we recreate "alive" node when reconnecting it might be not enough and race condition is possible.
-            /// And everything we can do here - just retry.
-            /// In worst case when we won't manage to see the alive node for a long time we will just abort the backup.
-            unready_host_state.alive = zk_nodes_set.contains(alive_node_name);
-            if (!unready_host_state.alive)
+            bool started = zk_nodes_set.contains(started_node_name);
+            bool alive = zk_nodes_set.contains(alive_node_name);
+
+            if (!alive)
            {
-                LOG_TRACE(log, "Seems like host ({}) is dead. Will retry the check to confirm", host);
-                auto holder = with_retries.createRetriesControlHolder("readCurrentState::checkAliveNode");
-                holder.retries_ctl.retryLoop(
-                    [&, &zookeeper = holder.faulty_zookeeper]()
-                {
-                    with_retries.renewZooKeeper(zookeeper);
-
-                    if (zookeeper->existsNoFailureInjection(alive_node_path))
-                    {
-                        unready_host_state.alive = true;
-                        return;
-                    }
-
-                    // Retry with backoff. We also check whether it is last retry or no, because we won't to rethrow an exception.
-                    if (!holder.retries_ctl.isLastRetry())
-                        holder.retries_ctl.setKeeperError(Coordination::Error::ZNONODE, "There is no alive node for host {}. Will retry", host);
-                });
+                /// If the "alive" node doesn't exist then we don't have connection to the corresponding host.
+                /// This node is ephemeral so probably it will be recreated soon. We use zookeeper retries to wait.
+                /// In worst case when we won't manage to see the alive node for a long time we will just abort the backup.
+                String message;
+                if (started)
+                    message = fmt::format("Lost connection to host {}", host);
+                else
+                    message = fmt::format("No connection to host {} yet", host);
+                if (!retries_ctl.isLastRetry())
+                    message += ", will retry";
+                retries_ctl.setUserError(ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE, message);
+                state.disconnected_host = host;
+                return state;
            }
-            LOG_TRACE(log, "Host ({}) appeared to be {}", host, unready_host_state.alive ? "alive" : "dead");

-            state.unready_hosts.emplace(host, unready_host_state);
-            if (!unready_host_state.alive && unready_host_state.started && !state.host_terminated)
-                state.host_terminated = host;
+            if (!unready_host)
+                unready_host.emplace(UnreadyHost{.host = host, .started = started});
        }
    }

-    if (state.host_terminated || !state.unready_hosts.empty())
-        return state;
-
-    auto holder = with_retries.createRetriesControlHolder("waitImpl::collectStagesToWait");
-    holder.retries_ctl.retryLoop(
-        [&, &zookeeper = holder.faulty_zookeeper]()
+    if (unready_host)
    {
-        with_retries.renewZooKeeper(zookeeper);
-        Strings results;
+        state.unready_host = std::move(unready_host);
+        return state;
+    }

-        for (const auto & host : all_hosts)
-            results.emplace_back(zookeeper->get(zookeeper_path + "/current|" + host + "|" + stage_to_wait));
-
-        state.results = std::move(results);
-    });
+    Strings results;
+    for (const auto & host : all_hosts)
+        results.emplace_back(zookeeper->get(zookeeper_path + "/current|" + host + "|" + stage_to_wait));
+    state.results = std::move(results);

    return state;
 }
@ -229,7 +206,7 @@ Strings BackupCoordinationStageSync::waitImpl(
        auto watch = std::make_shared<Poco::Event>();
        Strings zk_nodes;
        {
-            auto holder = with_retries.createRetriesControlHolder("waitImpl::getChildren");
+            auto holder = with_retries.createRetriesControlHolder("waitImpl");
            holder.retries_ctl.retryLoop(
                [&, &zookeeper = holder.faulty_zookeeper]()
            {
@ -237,17 +214,23 @@ Strings BackupCoordinationStageSync::waitImpl(
                watch->reset();
                /// Get zk nodes and subscribe on their changes.
                zk_nodes = zookeeper->getChildren(zookeeper_path, nullptr, watch);
+
+                /// Read the current state of zk nodes.
+                state = readCurrentState(holder, zk_nodes, all_hosts, stage_to_wait);
            });
        }

-        /// Read and analyze the current state of zk nodes.
-        state = readCurrentState(zk_nodes, all_hosts, stage_to_wait);
-        if (state.error || state.host_terminated || state.unready_hosts.empty())
-            break; /// Error happened or everything is ready.
+        /// Analyze the current state of zk nodes.
+        chassert(state.results || state.error || state.disconnected_host || state.unready_host);

-        /// Log that we will wait
-        const auto & unready_host = state.unready_hosts.begin()->first;
-        LOG_INFO(log, "Waiting on ZooKeeper watch for any node to be changed (currently waiting for host {})", unready_host);
+        if (state.results || state.error || state.disconnected_host)
+            break; /// Everything is ready or error happened.
+
+        /// Log what we will wait.
+        const auto & unready_host = *state.unready_host;
+        LOG_INFO(log, "Waiting on ZooKeeper watch for any node to be changed (currently waiting for host {}{})",
+                 unready_host.host,
+                 (!unready_host.started ? " which didn't start the operation yet" : ""));

        /// Wait until `watch_callback` is called by ZooKeeper meaning that zk nodes have changed.
        {
@ -270,23 +253,23 @@ Strings BackupCoordinationStageSync::waitImpl(
        state.error->second.rethrow();

    /// Another host terminated without errors.
-    if (state.host_terminated)
-        throw Exception(ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE, "Host {} suddenly stopped working", *state.host_terminated);
+    if (state.disconnected_host)
+        throw Exception(ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE, "No connection to host {}", *state.disconnected_host);

    /// Something's unready, timeout is probably not enough.
-    if (!state.unready_hosts.empty())
+    if (state.unready_host)
    {
-        const auto & [unready_host, unready_host_state] = *state.unready_hosts.begin();
+        const auto & unready_host = *state.unready_host;
        throw Exception(
            ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE,
            "Waited for host {} too long (> {}){}",
-            unready_host,
+            unready_host.host,
            to_string(*timeout),
-            unready_host_state.started ? "" : ": Operation didn't start");
+            unready_host.started ? "" : ": Operation didn't start");
    }

    LOG_TRACE(log, "Everything is Ok. All hosts achieved stage {}", stage_to_wait);
-    return state.results;
+    return std::move(*state.results);
 }

 }
--- a/src/Backups/BackupCoordinationStageSync.h
+++ b/src/Backups/BackupCoordinationStageSync.h
@ -29,7 +29,7 @@ private:
    void createRootNodes();

    struct State;
-    State readCurrentState(const Strings & zk_nodes, const Strings & all_hosts, const String & stage_to_wait) const;
+    State readCurrentState(WithRetries::RetriesControlHolder & retries_control_holder, const Strings & zk_nodes, const Strings & all_hosts, const String & stage_to_wait) const;

    Strings waitImpl(const Strings & all_hosts, const String & stage_to_wait, std::optional<std::chrono::milliseconds> timeout) const;

--- a/src/Backups/RestoreCoordinationRemote.cpp
+++ b/src/Backups/RestoreCoordinationRemote.cpp
@ -43,12 +43,12 @@ RestoreCoordinationRemote::RestoreCoordinationRemote(
            if (my_is_internal)
            {
                String alive_node_path = my_zookeeper_path + "/stage/alive|" + my_current_host;
-                auto code = zk->tryCreate(alive_node_path, "", zkutil::CreateMode::Ephemeral);

-                if (code == Coordination::Error::ZNODEEXISTS)
-                    zk->handleEphemeralNodeExistenceNoFailureInjection(alive_node_path, "");
-                else if (code != Coordination::Error::ZOK)
-                    throw zkutil::KeeperException::fromPath(code, alive_node_path);
+                /// Delete the ephemeral node from the previous connection so we don't have to wait for keeper to do it automatically.
+                zk->tryRemove(alive_node_path);
+
+                zk->createAncestors(alive_node_path);
+                zk->create(alive_node_path, "", zkutil::CreateMode::Ephemeral);
            }
        })
 {
--- a/src/Client/LineReader.cpp
+++ b/src/Client/LineReader.cpp
@ -7,7 +7,7 @@
 #include <cassert>
 #include <cstring>
 #include <unistd.h>
-#include <sys/select.h>
+#include <poll.h>
 #include <sys/time.h>
 #include <sys/types.h>

@ -27,11 +27,8 @@ void trim(String & s)
 /// Allows delaying the start of query execution until the entirety of query is inserted.
 bool hasInputData()
 {
-    timeval timeout = {0, 0};
-    fd_set fds{};
-    FD_ZERO(&fds);
-    FD_SET(STDIN_FILENO, &fds);
-    return select(1, &fds, nullptr, nullptr, &timeout) == 1;
+    pollfd fd{STDIN_FILENO, POLLIN, 0};
+    return poll(&fd, 1, 0) == 1;
 }

 struct NoCaseCompare
--- a/src/Common/CurrentMetrics.cpp
+++ b/src/Common/CurrentMetrics.cpp
@ -212,6 +212,8 @@
    M(PartsCommitted, "Deprecated. See PartsActive.") \
    M(PartsPreActive, "The part is in data_parts, but not used for SELECTs.") \
    M(PartsActive, "Active data part, used by current and upcoming SELECTs.") \
+    M(AttachedDatabase, "Active database, used by current and upcoming SELECTs.") \
+    M(AttachedTable, "Active table, used by current and upcoming SELECTs.") \
    M(PartsOutdated, "Not active data part, but could be used by only current SELECTs, could be deleted after SELECTs finishes.") \
    M(PartsDeleting, "Not active data part with identity refcounter, it is deleting right now by a cleaner.") \
    M(PartsDeleteOnDestroy, "Part was moved to another disk and should be deleted in own destructor.") \
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@ -238,6 +238,7 @@
    M(DictCacheLockReadNs, "Number of nanoseconds spend in waiting for read lock to lookup the data for the dictionaries of 'cache' types.") \
    \
    M(DistributedSyncInsertionTimeoutExceeded, "A timeout has exceeded while waiting for shards during synchronous insertion into a Distributed table (with 'distributed_foreground_insert' = 1)") \
+    M(DistributedAsyncInsertionFailures, "Number of failures for asynchronous insertion into a Distributed table (with 'distributed_foreground_insert' = 0)") \
    M(DataAfterMergeDiffersFromReplica, R"(
 Number of times data after merge is not byte-identical to the data on another replicas. There could be several reasons:
 1. Using newer version of compression library after server update.
@ -461,7 +462,8 @@ The server successfully detected this situation and will download merged part fr
    M(ReadBufferSeekCancelConnection, "Number of seeks which lead to new connection (s3, http)") \
    \
    M(SleepFunctionCalls, "Number of times a sleep function (sleep, sleepEachRow) has been called.") \
-    M(SleepFunctionMicroseconds, "Time spent sleeping due to a sleep function call.") \
+    M(SleepFunctionMicroseconds, "Time set to sleep in a sleep function (sleep, sleepEachRow).") \
+    M(SleepFunctionElapsedMicroseconds, "Time spent sleeping in a sleep function (sleep, sleepEachRow).") \
    \
    M(ThreadPoolReaderPageCacheHit, "Number of times the read inside ThreadPoolReader was done from page cache.") \
    M(ThreadPoolReaderPageCacheHitBytes, "Number of bytes read inside ThreadPoolReader when it was done from page cache.") \
--- a/src/Common/RWLock.cpp
+++ b/src/Common/RWLock.cpp
@ -3,6 +3,8 @@
 #include <Common/Exception.h>
 #include <Common/CurrentMetrics.h>
 #include <Common/ProfileEvents.h>
+#include <IO/Operators.h>
+#include <IO/WriteBufferFromString.h>


 namespace ProfileEvents
@ -155,25 +157,34 @@ RWLockImpl::getLock(RWLockImpl::Type type, const String & query_id, const std::c

    if (type == Type::Write)
    {
+        /// Always add a group for a writer (writes are never performed simultaneously).
        writers_queue.emplace_back(type);  /// SM1: may throw (nothing to roll back)
    }
-    else if (readers_queue.empty() ||
-            (rdlock_owner == readers_queue.begin() && readers_queue.size() == 1 && !writers_queue.empty()))
+    else
    {
-        readers_queue.emplace_back(type);  /// SM1: may throw (nothing to roll back)
+        /// We don't always add a group to readers_queue here because multiple readers can use the same group.
+        /// We can reuse the last group if the last group didn't get ownership yet,
+        /// or even if it got ownership but there are no writers waiting in writers_queue.
+        bool can_use_last_group = !readers_queue.empty() && (!readers_queue.back().ownership || writers_queue.empty());
+
+        if (!can_use_last_group)
+            readers_queue.emplace_back(type);  /// SM1: may throw (nothing to roll back)
    }
+
    GroupsContainer::iterator it_group =
            (type == Type::Write) ? std::prev(writers_queue.end()) : std::prev(readers_queue.end());

    /// Lock is free to acquire
    if (rdlock_owner == readers_queue.end() && wrlock_owner == writers_queue.end())
    {
+        /// Set `rdlock_owner` or `wrlock_owner` and make it owner.
        (type == Read ? rdlock_owner : wrlock_owner) = it_group;  /// SM2: nothrow
+        grantOwnership(it_group);
    }
    else
    {
        /// Wait until our group becomes the lock owner
-        const auto predicate = [&] () { return it_group == (type == Read ? rdlock_owner : wrlock_owner); };
+        const auto predicate = [&] () { return it_group->ownership; };

        if (lock_deadline_tp == std::chrono::time_point<std::chrono::steady_clock>::max())
        {
@ -193,15 +204,20 @@ RWLockImpl::getLock(RWLockImpl::Type type, const String & query_id, const std::c
                /// Rollback(SM1): nothrow
                if (it_group->requests == 0)
                {
-                    /// When WRITE lock fails, we need to notify next read that is waiting,
-                    /// to avoid handing request, hence next=true.
-                    dropOwnerGroupAndPassOwnership(it_group, /* next= */ true);
+                    ((type == Read) ? readers_queue : writers_queue).erase(it_group);
                }
+                /// While we were waiting for this write lock (which has just failed) more readers could start waiting,
+                /// we need to wake up them now.
+                if ((rdlock_owner != readers_queue.end()) && writers_queue.empty())
+                    grantOwnershipToAllReaders();
                return nullptr;
            }
        }
    }

+    /// Our group must be an owner here.
+    chassert(it_group->ownership);
+
    if (request_has_query_id)
    {
        try
@ -216,7 +232,7 @@ RWLockImpl::getLock(RWLockImpl::Type type, const String & query_id, const std::c
            /// Methods std::list<>::emplace_back() and std::unordered_map<>::emplace() provide strong exception safety
            /// We only need to roll back the changes to these objects: owner_queries and the readers/writers queue
            if (it_group->requests == 0)
-                dropOwnerGroupAndPassOwnership(it_group, /* next= */ false);  /// Rollback(SM1): nothrow
+                dropOwnerGroupAndPassOwnership(it_group);  /// Rollback(SM1): nothrow

            throw;
        }
@ -237,19 +253,28 @@ RWLockImpl::getLock(RWLockImpl::Type type, const String & query_id, const std::c
  * it is guaranteed that all three steps have been executed successfully and the resulting state is consistent.
  * With the mutex locked the order of steps to restore the lock's state can be arbitrary
  *
-  * We do not employ try-catch: if something bad happens, there is nothing we can do =(
+  * We do not employ try-catch: if something bad happens and chassert() is disabled, there is nothing we can do
+  * (we can't throw an exception here because RWLockImpl::unlock() is called from the destructor ~LockHolderImpl).
  */
 void RWLockImpl::unlock(GroupsContainer::iterator group_it, const String & query_id) noexcept
 {
    std::lock_guard state_lock(internal_state_mtx);

-    /// All of these are Undefined behavior and nothing we can do!
-    if (rdlock_owner == readers_queue.end() && wrlock_owner == writers_queue.end())
+    /// Our group must be an owner here.
+    if (!group_it->ownership)
+    {
+        chassert(false && "RWLockImpl::unlock() is called for a non-owner group");
        return;
-    if (rdlock_owner != readers_queue.end() && group_it != rdlock_owner)
-        return;
-    if (wrlock_owner != writers_queue.end() && group_it != wrlock_owner)
+    }
+
+    /// Check consistency.
+    if ((group_it->type == Read)
+            ? !(rdlock_owner != readers_queue.end() && wrlock_owner == writers_queue.end())
+            : !(wrlock_owner != writers_queue.end() && rdlock_owner == readers_queue.end() && group_it == wrlock_owner))
+    {
+        chassert(false && "RWLockImpl::unlock() found the rwlock inconsistent");
        return;
+    }

    /// If query_id is not empty it must be listed in parent->owner_queries
    if (query_id != NO_QUERY)
@ -264,12 +289,26 @@ void RWLockImpl::unlock(GroupsContainer::iterator group_it, const String & query

    /// If we are the last remaining referrer, remove this QNode and notify the next one
    if (--group_it->requests == 0)               /// SM: nothrow
-        dropOwnerGroupAndPassOwnership(group_it, /* next= */ false);
+        dropOwnerGroupAndPassOwnership(group_it);
 }


-void RWLockImpl::dropOwnerGroupAndPassOwnership(GroupsContainer::iterator group_it, bool next) noexcept
+void RWLockImpl::dropOwnerGroupAndPassOwnership(GroupsContainer::iterator group_it) noexcept
 {
+    /// All readers with ownership must finish before switching to write phase.
+    /// Such readers has iterators from `readers_queue.begin()` to `rdlock_owner`, so if `rdlock_owner` is equal to `readers_queue.begin()`
+    /// that means there is only one reader with ownership left in the readers_queue and we can proceed to generic procedure.
+    if ((group_it->type == Read) && (rdlock_owner != readers_queue.begin()) && (rdlock_owner != readers_queue.end()))
+    {
+        if (rdlock_owner == group_it)
+            --rdlock_owner;
+        readers_queue.erase(group_it);
+        /// If there are no writers waiting in writers_queue then we can wake up other readers.
+        if (writers_queue.empty())
+            grantOwnershipToAllReaders();
+        return;
+    }
+
    rdlock_owner = readers_queue.end();
    wrlock_owner = writers_queue.end();

@ -278,42 +317,86 @@ void RWLockImpl::dropOwnerGroupAndPassOwnership(GroupsContainer::iterator group_
        readers_queue.erase(group_it);
        /// Prepare next phase
        if (!writers_queue.empty())
-        {
            wrlock_owner = writers_queue.begin();
-        }
        else
-        {
            rdlock_owner = readers_queue.begin();
-        }
    }
    else
    {
        writers_queue.erase(group_it);
        /// Prepare next phase
        if (!readers_queue.empty())
-        {
-            if (next && readers_queue.size() > 1)
-            {
-                rdlock_owner = std::next(readers_queue.begin());
-            }
-            else
-            {
-                rdlock_owner = readers_queue.begin();
-            }
-        }
+            rdlock_owner = readers_queue.begin();
        else
-        {
            wrlock_owner = writers_queue.begin();
-        }
    }

    if (rdlock_owner != readers_queue.end())
    {
-        rdlock_owner->cv.notify_all();
+        grantOwnershipToAllReaders();
    }
    else if (wrlock_owner != writers_queue.end())
    {
-        wrlock_owner->cv.notify_one();
+        grantOwnership(wrlock_owner);
    }
 }
+
+
+void RWLockImpl::grantOwnership(GroupsContainer::iterator group_it) noexcept
+{
+    if (!group_it->ownership)
+    {
+        group_it->ownership = true;
+        group_it->cv.notify_all();
+    }
+}
+
+
+void RWLockImpl::grantOwnershipToAllReaders() noexcept
+{
+    if (rdlock_owner != readers_queue.end())
+    {
+        size_t num_new_owners = 0;
+
+        for (;;)
+        {
+            if (!rdlock_owner->ownership)
+                ++num_new_owners;
+            grantOwnership(rdlock_owner);
+            if (std::next(rdlock_owner) == readers_queue.end())
+                break;
+            ++rdlock_owner;
+        }
+
+        /// There couldn't be more than one reader group which is not an owner.
+        /// (Because we add a new reader group only if the last reader group is already an owner - see the `can_use_last_group` variable.)
+        chassert(num_new_owners <= 1);
+    }
+}
+
+
+std::unordered_map<String, size_t> RWLockImpl::getOwnerQueryIds() const
+{
+    std::lock_guard lock{internal_state_mtx};
+    return owner_queries;
+}
+
+
+String RWLockImpl::getOwnerQueryIdsDescription() const
+{
+    auto map = getOwnerQueryIds();
+    WriteBufferFromOwnString out;
+    bool need_comma = false;
+    for (const auto & [query_id, num_owners] : map)
+    {
+        if (need_comma)
+            out << ", ";
+        out << query_id;
+        if (num_owners != 1)
+            out << " (" << num_owners << ")";
+        need_comma = true;
+    }
+    return out.str();
+}
+
 }
--- a/src/Common/RWLock.h
+++ b/src/Common/RWLock.h
@ -62,35 +62,42 @@ public:
    inline static const String NO_QUERY = String();
    inline static const auto default_locking_timeout_ms = std::chrono::milliseconds(120000);

+    /// Returns all query_id owning locks (both read and write) right now.
+    /// !! This function are for debugging and logging purposes only, DO NOT use them for synchronization!
+    std::unordered_map<String, size_t> getOwnerQueryIds() const;
+    String getOwnerQueryIdsDescription() const;
+
 private:
    /// Group of locking requests that should be granted simultaneously
    /// i.e. one or several readers or a single writer
    struct Group
    {
        const Type type;
-        size_t requests;
+        size_t requests = 0;

+        bool ownership = false; /// whether this group got ownership? (that means `cv` is notified and the locking requests should stop waiting)
        std::condition_variable cv; /// all locking requests of the group wait on this condvar

-        explicit Group(Type type_) : type{type_}, requests{0} {}
+        explicit Group(Type type_) : type{type_} {}
    };

    using GroupsContainer = std::list<Group>;
-    using OwnerQueryIds = std::unordered_map<String, size_t>;
+    using OwnerQueryIds = std::unordered_map<String /* query_id */, size_t /* num_owners */>;

    mutable std::mutex internal_state_mtx;

    GroupsContainer readers_queue;
    GroupsContainer writers_queue;
-    GroupsContainer::iterator rdlock_owner{readers_queue.end()};  /// equals to readers_queue.begin() in read phase
-                                                                  /// or readers_queue.end() otherwise
+    GroupsContainer::iterator rdlock_owner{readers_queue.end()};  /// last group with ownership in readers_queue in read phase
+                                                                  /// or readers_queue.end() in writer phase
    GroupsContainer::iterator wrlock_owner{writers_queue.end()};  /// equals to writers_queue.begin() in write phase
-                                                                  /// or writers_queue.end() otherwise
+                                                                  /// or writers_queue.end() in read phase
    OwnerQueryIds owner_queries;

    RWLockImpl() = default;
    void unlock(GroupsContainer::iterator group_it, const String & query_id) noexcept;
-    /// @param next - notify next after begin, used on writer lock failures
-    void dropOwnerGroupAndPassOwnership(GroupsContainer::iterator group_it, bool next) noexcept;
+    void dropOwnerGroupAndPassOwnership(GroupsContainer::iterator group_it) noexcept;
+    void grantOwnership(GroupsContainer::iterator group_it) noexcept;
+    void grantOwnershipToAllReaders() noexcept;
 };
 }
--- a/src/Common/ThreadPool.cpp
+++ b/src/Common/ThreadPool.cpp
@ -500,3 +500,10 @@ GlobalThreadPool & GlobalThreadPool::instance()

    return *the_instance;
 }
+void GlobalThreadPool::shutdown()
+{
+    if (the_instance)
+    {
+        the_instance->finalize();
+    }
+}
--- a/src/Common/ThreadPool.h
+++ b/src/Common/ThreadPool.h
@ -109,6 +109,8 @@ public:
    void addOnDestroyCallback(OnDestroyCallback && callback);

 private:
+    friend class GlobalThreadPool;
+
    mutable std::mutex mutex;
    std::condition_variable job_finished;
    std::condition_variable new_job_or_shutdown;
@ -205,6 +207,7 @@ class GlobalThreadPool : public FreeThreadPool, private boost::noncopyable
 public:
    static void initialize(size_t max_threads = 10000, size_t max_free_threads = 1000, size_t queue_size = 10000);
    static GlobalThreadPool & instance();
+    static void shutdown();
 };


--- a/src/Common/mysqlxx/Pool.cpp
+++ b/src/Common/mysqlxx/Pool.cpp
@ -52,8 +52,7 @@ void Pool::Entry::decrementRefCount()
 Pool::Pool(const Poco::Util::AbstractConfiguration & cfg, const std::string & config_name,
     unsigned default_connections_, unsigned max_connections_,
     const char * parent_config_name_)
-    : logger(Poco::Logger::get("mysqlxx::Pool"))
-    , default_connections(default_connections_)
+    : default_connections(default_connections_)
    , max_connections(max_connections_)
 {
    server = cfg.getString(config_name + ".host");
@ -127,6 +126,38 @@ Pool::Pool(const Poco::Util::AbstractConfiguration & cfg, const std::string & co
 }


+Pool::Pool(
+    const std::string & db_,
+     const std::string & server_,
+     const std::string & user_,
+     const std::string & password_,
+     unsigned port_,
+     const std::string & socket_,
+     unsigned connect_timeout_,
+     unsigned rw_timeout_,
+     unsigned default_connections_,
+     unsigned max_connections_,
+     unsigned enable_local_infile_,
+     bool opt_reconnect_)
+    : default_connections(default_connections_)
+    , max_connections(max_connections_)
+    , db(db_)
+    , server(server_)
+    , user(user_)
+    , password(password_)
+    , port(port_)
+    , socket(socket_)
+    , connect_timeout(connect_timeout_)
+    , rw_timeout(rw_timeout_)
+    , enable_local_infile(enable_local_infile_)
+    , opt_reconnect(opt_reconnect_)
+{
+    LOG_DEBUG(log,
+        "Created MySQL Pool with settings: connect_timeout={}, read_write_timeout={}, default_connections_number={}, max_connections_number={}",
+        connect_timeout, rw_timeout, default_connections, max_connections);
+}
+
+
 Pool::~Pool()
 {
    std::lock_guard lock(mutex);
@ -148,29 +179,29 @@ Pool::Entry Pool::get(uint64_t wait_timeout)
    initialize();
    for (;;)
    {
-        logger.trace("(%s): Iterating through existing MySQL connections", getDescription());
+        LOG_TRACE(log, "{}: Iterating through existing MySQL connections", getDescription());

        for (auto & connection : connections)
        {
            if (connection->ref_count == 0)
            {
-                logger.test("Found free connection in pool, returning it to the caller");
+                LOG_TEST(log, "Found free connection in pool, returning it to the caller");
                return Entry(connection, this);
            }
        }

-        logger.trace("(%s): Trying to allocate a new connection.", getDescription());
+        LOG_TRACE(log, "{}: Trying to allocate a new connection.", getDescription());
        if (connections.size() < static_cast<size_t>(max_connections))
        {
            Connection * conn = allocConnection();
            if (conn)
                return Entry(conn, this);

-            logger.trace("(%s): Unable to create a new connection: Allocation failed.", getDescription());
+            LOG_TRACE(log, "{}: Unable to create a new connection: Allocation failed.", getDescription());
        }
        else
        {
-            logger.trace("(%s): Unable to create a new connection: Max number of connections has been reached.", getDescription());
+            LOG_TRACE(log, "{}: Unable to create a new connection: Max number of connections has been reached.", getDescription());
        }

        if (!wait_timeout)
@ -180,7 +211,7 @@ Pool::Entry Pool::get(uint64_t wait_timeout)
            throw Poco::Exception("mysqlxx::Pool is full (connection_wait_timeout is exceeded)");

        lock.unlock();
-        logger.trace("(%s): Sleeping for %d seconds.", getDescription(), MYSQLXX_POOL_SLEEP_ON_CONNECT_FAIL);
+        LOG_TRACE(log, "{}: Sleeping for {} seconds.", getDescription(), MYSQLXX_POOL_SLEEP_ON_CONNECT_FAIL);
        sleepForSeconds(MYSQLXX_POOL_SLEEP_ON_CONNECT_FAIL);
        lock.lock();
    }
@ -206,7 +237,7 @@ Pool::Entry Pool::tryGet()
                    return res;
            }

-            logger.debug("(%s): Idle connection to MySQL server cannot be recovered, dropping it.", getDescription());
+            LOG_DEBUG(log, "{}: Idle connection to MySQL server cannot be recovered, dropping it.", getDescription());

            /// This one is disconnected, cannot be reestablished and so needs to be disposed of.
            connection_it = connections.erase(connection_it);
@ -229,7 +260,7 @@ Pool::Entry Pool::tryGet()

 void Pool::removeConnection(Connection* connection)
 {
-    logger.trace("(%s): Removing connection.", getDescription());
+    LOG_TRACE(log, "{}: Removing connection.", getDescription());

    std::lock_guard lock(mutex);
    if (connection)
@ -260,8 +291,8 @@ void Pool::Entry::forceConnected() const
        else
            sleepForSeconds(MYSQLXX_POOL_SLEEP_ON_CONNECT_FAIL);

-        pool->logger.debug(
-            "Creating a new MySQL connection to %s with settings: connect_timeout=%u, read_write_timeout=%u",
+        LOG_DEBUG(pool->log,
+            "Creating a new MySQL connection to {} with settings: connect_timeout={}, read_write_timeout={}",
            pool->description, pool->connect_timeout, pool->rw_timeout);

        data->conn.connect(
@ -287,21 +318,21 @@ bool Pool::Entry::tryForceConnected() const
    auto * const mysql_driver = data->conn.getDriver();
    const auto prev_connection_id = mysql_thread_id(mysql_driver);

-    pool->logger.trace("Entry(connection %lu): sending PING to check if it is alive.", prev_connection_id);
+    LOG_TRACE(pool->log, "Entry(connection {}): sending PING to check if it is alive.", prev_connection_id);
    if (data->conn.ping())  /// Attempts to reestablish lost connection
    {
        const auto current_connection_id = mysql_thread_id(mysql_driver);
        if (prev_connection_id != current_connection_id)
        {
-            pool->logger.debug("Entry(connection %lu): Reconnected to MySQL server. Connection id changed: %lu -> %lu",
-                                current_connection_id, prev_connection_id, current_connection_id);
+            LOG_DEBUG(pool->log, "Entry(connection {}): Reconnected to MySQL server. Connection id changed: {} -> {}",
+                current_connection_id, prev_connection_id, current_connection_id);
        }

-        pool->logger.trace("Entry(connection %lu): PING ok.", current_connection_id);
+        LOG_TRACE(pool->log, "Entry(connection {}): PING ok.", current_connection_id);
        return true;
    }

-    pool->logger.trace("Entry(connection %lu): PING failed.", prev_connection_id);
+    LOG_TRACE(pool->log, "Entry(connection {}): PING failed.", prev_connection_id);
    return false;
 }

@ -326,10 +357,10 @@ Pool::Connection * Pool::allocConnection(bool dont_throw_if_failed_first_time)

    try
    {
-        logger.debug("Connecting to %s", description);
+        LOG_DEBUG(log, "Connecting to {}", description);

-        logger.debug(
-            "Creating a new MySQL connection to %s with settings: connect_timeout=%u, read_write_timeout=%u",
+        LOG_DEBUG(log,
+            "Creating a new MySQL connection to {} with settings: connect_timeout={}, read_write_timeout={}",
            description, connect_timeout, rw_timeout);

        conn_ptr->conn.connect(
@ -349,7 +380,7 @@ Pool::Connection * Pool::allocConnection(bool dont_throw_if_failed_first_time)
    }
    catch (mysqlxx::ConnectionFailed & e)
    {
-        logger.error(e.what());
+        LOG_ERROR(log, "Failed to connect to MySQL ({}): {}", description, e.what());

        if ((!was_successful && !dont_throw_if_failed_first_time)
            || e.errnum() == ER_ACCESS_DENIED_ERROR
--- a/src/Common/mysqlxx/mysqlxx/Pool.h
+++ b/src/Common/mysqlxx/mysqlxx/Pool.h
@ -169,28 +169,10 @@ public:
         unsigned default_connections_ = MYSQLXX_POOL_DEFAULT_START_CONNECTIONS,
         unsigned max_connections_ = MYSQLXX_POOL_DEFAULT_MAX_CONNECTIONS,
         unsigned enable_local_infile_ = MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE,
-         bool opt_reconnect_ = MYSQLXX_DEFAULT_MYSQL_OPT_RECONNECT)
-    : logger(Poco::Logger::get("mysqlxx::Pool"))
-    , default_connections(default_connections_)
-    , max_connections(max_connections_)
-    , db(db_)
-    , server(server_)
-    , user(user_)
-    , password(password_)
-    , port(port_)
-    , socket(socket_)
-    , connect_timeout(connect_timeout_)
-    , rw_timeout(rw_timeout_)
-    , enable_local_infile(enable_local_infile_)
-    , opt_reconnect(opt_reconnect_)
-    {
-        logger.debug(
-            "Created MySQL Pool with settings: connect_timeout=%u, read_write_timeout=%u, default_connections_number=%u, max_connections_number=%u",
-            connect_timeout, rw_timeout, default_connections, max_connections);
-    }
+         bool opt_reconnect_ = MYSQLXX_DEFAULT_MYSQL_OPT_RECONNECT);

    Pool(const Pool & other)
-        : logger(other.logger), default_connections{other.default_connections},
+        : default_connections{other.default_connections},
          max_connections{other.max_connections},
          db{other.db}, server{other.server},
          user{other.user}, password{other.password},
@ -220,7 +202,7 @@ public:
    void removeConnection(Connection * connection);

 protected:
-    Poco::Logger & logger;
+    Poco::Logger * log = &Poco::Logger::get("mysqlxx::Pool");

    /// Number of MySQL connections which are created at launch.
    unsigned default_connections;
--- a/src/Common/parseRemoteDescription.cpp
+++ b/src/Common/parseRemoteDescription.cpp
@ -184,7 +184,7 @@ std::vector<std::pair<String, uint16_t>> parseRemoteDescriptionForExternalDataba
        }
        else
        {
-            result.emplace_back(std::make_pair(address.substr(0, colon), DB::parseFromString<UInt16>(address.substr(colon + 1))));
+            result.emplace_back(std::make_pair(address.substr(0, colon), parseFromString<UInt16>(address.substr(colon + 1))));
        }
    }

--- a/src/Common/parseRemoteDescription.h
+++ b/src/Common/parseRemoteDescription.h
@ -1,8 +1,12 @@
 #pragma once
+
 #include <base/types.h>
 #include <vector>
+
+
 namespace DB
 {
+
 /* Parse a string that generates shards and replicas. Separator - one of two characters '|' or ','
 *  depending on whether shards or replicas are generated.
 * For example:
--- a/src/Common/tests/gtest_rw_lock.cpp
+++ b/src/Common/tests/gtest_rw_lock.cpp
@ -24,6 +24,41 @@ namespace DB
 }


+namespace
+{
+    class Events
+    {
+    public:
+        Events() : start_time(std::chrono::steady_clock::now()) {}
+
+        void add(String && event, std::chrono::milliseconds correction = std::chrono::milliseconds::zero())
+        {
+            String timepoint = std::to_string(std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - start_time).count());
+            if (timepoint.length() < 5)
+                timepoint.insert(0, 5 - timepoint.length(), ' ');
+            if (correction.count())
+                std::this_thread::sleep_for(correction);
+            std::lock_guard lock{mutex};
+            //std::cout << timepoint << " : " << event << std::endl;
+            events.emplace_back(std::move(event));
+        }
+
+        void check(const Strings & expected_events)
+        {
+            std::lock_guard lock{mutex};
+            EXPECT_EQ(events.size(), expected_events.size());
+            for (size_t i = 0; i != events.size(); ++i)
+                EXPECT_EQ(events[i], (i < expected_events.size() ? expected_events[i] : ""));
+        }
+
+    private:
+        const std::chrono::time_point<std::chrono::steady_clock> start_time;
+        Strings events TSA_GUARDED_BY(mutex);
+        mutable std::mutex mutex;
+    };
+}
+
+
 TEST(Common, RWLock1)
 {
    /// Tests with threads require this, because otherwise
@ -287,3 +322,260 @@ TEST(Common, RWLockNotUpgradeableWithNoQuery)

    read_thread.join();
 }
+
+
+TEST(Common, RWLockWriteLockTimeoutDuringRead)
+{
+    /// 0                 100                         200                      300                 400
+    /// <---------------------------------------- ra ---------------------------------------------->
+    ///                     <----- wc (acquiring lock, failed by timeout) ----->
+    ///                                                                                             <wd>
+    ///
+    ///    0 : Locking ra
+    ///    0 : Locked ra
+    ///  100 : Locking wc
+    ///  300 : Failed to lock wc
+    ///  400 : Unlocking ra
+    ///  400 : Unlocked ra
+    ///  400 : Locking wd
+    ///  400 : Locked wd
+    ///  400 : Unlocking wd
+    ///  400 : Unlocked wd
+
+    static auto rw_lock = RWLockImpl::create();
+    Events events;
+
+    std::thread ra_thread([&] ()
+    {
+        events.add("Locking ra");
+        auto ra = rw_lock->getLock(RWLockImpl::Read, "ra");
+        events.add(ra ? "Locked ra" : "Failed to lock ra");
+        EXPECT_NE(ra, nullptr);
+
+        std::this_thread::sleep_for(std::chrono::duration<int, std::milli>(400));
+
+        events.add("Unlocking ra");
+        ra.reset();
+        events.add("Unlocked ra");
+    });
+
+    std::thread wc_thread([&] ()
+    {
+        std::this_thread::sleep_for(std::chrono::duration<int, std::milli>(100));
+        events.add("Locking wc");
+        auto wc = rw_lock->getLock(RWLockImpl::Write, "wc", std::chrono::milliseconds(200));
+        events.add(wc ? "Locked wc" : "Failed to lock wc");
+        EXPECT_EQ(wc, nullptr);
+    });
+
+    ra_thread.join();
+    wc_thread.join();
+
+    {
+        events.add("Locking wd");
+        auto wd = rw_lock->getLock(RWLockImpl::Write, "wd", std::chrono::milliseconds(1000));
+        events.add(wd ? "Locked wd" : "Failed to lock wd");
+        EXPECT_NE(wd, nullptr);
+        events.add("Unlocking wd");
+        wd.reset();
+        events.add("Unlocked wd");
+    }
+
+    events.check(
+        {"Locking ra",
+         "Locked ra",
+         "Locking wc",
+         "Failed to lock wc",
+         "Unlocking ra",
+         "Unlocked ra",
+         "Locking wd",
+         "Locked wd",
+         "Unlocking wd",
+         "Unlocked wd"});
+}
+
+
+TEST(Common, RWLockWriteLockTimeoutDuringTwoReads)
+{
+    /// 0                 100                         200                         300               400                500
+    /// <---------------------------------------- ra ----------------------------------------------->
+    ///                     <------ wc (acquiring lock, failed by timeout) ------->
+    ///                                                 <-- rb (acquiring lock) --><---------- rb (locked) ------------>
+    ///                                                                                                                 <wd>
+    ///
+    ///    0 : Locking ra
+    ///    0 : Locked ra
+    ///  100 : Locking wc
+    ///  200 : Locking rb
+    ///  300 : Failed to lock wc
+    ///  300 : Locked rb
+    ///  400 : Unlocking ra
+    ///  400 : Unlocked ra
+    ///  500 : Unlocking rb
+    ///  500 : Unlocked rb
+    ///  501 : Locking wd
+    ///  501 : Locked wd
+    ///  501 : Unlocking wd
+    ///  501 : Unlocked wd
+
+    static auto rw_lock = RWLockImpl::create();
+    Events events;
+
+    std::thread ra_thread([&] ()
+    {
+        events.add("Locking ra");
+        auto ra = rw_lock->getLock(RWLockImpl::Read, "ra");
+        events.add(ra ? "Locked ra" : "Failed to lock ra");
+        EXPECT_NE(ra, nullptr);
+
+        std::this_thread::sleep_for(std::chrono::duration<int, std::milli>(400));
+
+        events.add("Unlocking ra");
+        ra.reset();
+        events.add("Unlocked ra");
+    });
+
+    std::thread rb_thread([&] ()
+    {
+        std::this_thread::sleep_for(std::chrono::duration<int, std::milli>(200));
+        events.add("Locking rb");
+
+        auto rb = rw_lock->getLock(RWLockImpl::Read, "rb");
+
+        /// `correction` is used here to add an event to `events` a little later.
+        /// (Because the event "Locked rb" happens at nearly the same time as "Failed to lock wc" and we don't want our test to be flaky.)
+        auto correction = std::chrono::duration<int, std::milli>(50);
+        events.add(rb ? "Locked rb" : "Failed to lock rb", correction);
+        EXPECT_NE(rb, nullptr);
+
+        std::this_thread::sleep_for(std::chrono::duration<int, std::milli>(200) - correction);
+        events.add("Unlocking rb");
+        rb.reset();
+        events.add("Unlocked rb");
+    });
+
+    std::thread wc_thread([&] ()
+    {
+        std::this_thread::sleep_for(std::chrono::duration<int, std::milli>(100));
+        events.add("Locking wc");
+        auto wc = rw_lock->getLock(RWLockImpl::Write, "wc", std::chrono::milliseconds(200));
+        events.add(wc ? "Locked wc" : "Failed to lock wc");
+        EXPECT_EQ(wc, nullptr);
+    });
+
+    ra_thread.join();
+    rb_thread.join();
+    wc_thread.join();
+
+    {
+        events.add("Locking wd");
+        auto wd = rw_lock->getLock(RWLockImpl::Write, "wd", std::chrono::milliseconds(1000));
+        events.add(wd ? "Locked wd" : "Failed to lock wd");
+        EXPECT_NE(wd, nullptr);
+        events.add("Unlocking wd");
+        wd.reset();
+        events.add("Unlocked wd");
+    }
+
+    events.check(
+        {"Locking ra",
+         "Locked ra",
+         "Locking wc",
+         "Locking rb",
+         "Failed to lock wc",
+         "Locked rb",
+         "Unlocking ra",
+         "Unlocked ra",
+         "Unlocking rb",
+         "Unlocked rb",
+         "Locking wd",
+         "Locked wd",
+         "Unlocking wd",
+         "Unlocked wd"});
+}
+
+
+TEST(Common, RWLockWriteLockTimeoutDuringWriteWithWaitingRead)
+{
+    /// 0                 100                         200                        300                 400                500
+    /// <--------------------------------------------------- wa -------------------------------------------------------->
+    ///                     <------ wb (acquiring lock, failed by timeout) ------>
+    ///                                                 <-- rc (acquiring lock, failed by timeout) -->
+    ///                                                                                                                  <wd>
+    ///
+    ///    0 : Locking wa
+    ///    0 : Locked wa
+    ///  100 : Locking wb
+    ///  200 : Locking rc
+    ///  300 : Failed to lock wb
+    ///  400 : Failed to lock rc
+    ///  500 : Unlocking wa
+    ///  500 : Unlocked wa
+    ///  501 : Locking wd
+    ///  501 : Locked wd
+    ///  501 : Unlocking wd
+    ///  501 : Unlocked wd
+
+    static auto rw_lock = RWLockImpl::create();
+    Events events;
+
+    std::thread wa_thread([&] ()
+    {
+        events.add("Locking wa");
+        auto wa = rw_lock->getLock(RWLockImpl::Write, "wa");
+        events.add(wa ? "Locked wa" : "Failed to lock wa");
+        EXPECT_NE(wa, nullptr);
+
+        std::this_thread::sleep_for(std::chrono::duration<int, std::milli>(500));
+
+        events.add("Unlocking wa");
+        wa.reset();
+        events.add("Unlocked wa");
+    });
+
+    std::thread wb_thread([&] ()
+    {
+        std::this_thread::sleep_for(std::chrono::duration<int, std::milli>(100));
+        events.add("Locking wb");
+        auto wc = rw_lock->getLock(RWLockImpl::Write, "wc", std::chrono::milliseconds(200));
+        events.add(wc ? "Locked wb" : "Failed to lock wb");
+        EXPECT_EQ(wc, nullptr);
+    });
+    
+    std::thread rc_thread([&] ()
+    {
+        std::this_thread::sleep_for(std::chrono::duration<int, std::milli>(200));
+        events.add("Locking rc");
+        auto rc = rw_lock->getLock(RWLockImpl::Read, "rc", std::chrono::milliseconds(200));
+        events.add(rc ? "Locked rc" : "Failed to lock rc");
+        EXPECT_EQ(rc, nullptr);
+    });
+
+    wa_thread.join();
+    wb_thread.join();
+    rc_thread.join();
+
+    {
+        events.add("Locking wd");
+        auto wd = rw_lock->getLock(RWLockImpl::Write, "wd", std::chrono::milliseconds(1000));
+        events.add(wd ? "Locked wd" : "Failed to lock wd");
+        EXPECT_NE(wd, nullptr);
+        events.add("Unlocking wd");
+        wd.reset();
+        events.add("Unlocked wd");
+    }
+
+    events.check(
+        {"Locking wa",
+         "Locked wa",
+         "Locking wb",
+         "Locking rc",
+         "Failed to lock wb",
+         "Failed to lock rc",
+         "Unlocking wa",
+         "Unlocked wa",
+         "Locking wd",
+         "Locked wd",
+         "Unlocking wd",
+         "Unlocked wd"});
+}
--- a/src/Coordination/KeeperAsynchronousMetrics.cpp
+++ b/src/Coordination/KeeperAsynchronousMetrics.cpp
@ -113,6 +113,12 @@ KeeperAsynchronousMetrics::KeeperAsynchronousMetrics(
 {
 }

+KeeperAsynchronousMetrics::~KeeperAsynchronousMetrics()
+{
+    /// NOTE: stop() from base class is not enough, since this leads to leak on vptr
+    stop();
+}
+
 void KeeperAsynchronousMetrics::updateImpl(AsynchronousMetricValues & new_values, TimePoint /*update_time*/, TimePoint /*current_time*/)
 {
 #if USE_NURAFT
--- a/src/Coordination/KeeperAsynchronousMetrics.h
+++ b/src/Coordination/KeeperAsynchronousMetrics.h
@ -14,6 +14,7 @@ class KeeperAsynchronousMetrics : public AsynchronousMetrics
 public:
    KeeperAsynchronousMetrics(
        ContextPtr context_, int update_period_seconds, const ProtocolServerMetricsFunc & protocol_server_metrics_func_);
+    ~KeeperAsynchronousMetrics() override;

 private:
    ContextPtr context;
--- a/src/Coordination/KeeperContext.cpp
+++ b/src/Coordination/KeeperContext.cpp
@ -59,6 +59,8 @@ void KeeperContext::initialize(const Poco::Util::AbstractConfiguration & config,
        }
    }

+    updateKeeperMemorySoftLimit(config);
+
    digest_enabled = config.getBool("keeper_server.digest_enabled", false);
    ignore_system_path_on_startup = config.getBool("keeper_server.ignore_system_path_on_startup", false);

@ -375,4 +377,10 @@ void KeeperContext::initializeFeatureFlags(const Poco::Util::AbstractConfigurati
    feature_flags.logFlags(&Poco::Logger::get("KeeperContext"));
 }

+void KeeperContext::updateKeeperMemorySoftLimit(const Poco::Util::AbstractConfiguration & config)
+{
+    if (config.hasProperty("keeper_server.max_memory_usage_soft_limit"))
+        memory_soft_limit = config.getUInt64("keeper_server.max_memory_usage_soft_limit");
+}
+
 }
--- a/src/Coordination/KeeperContext.h
+++ b/src/Coordination/KeeperContext.h
@ -53,6 +53,9 @@ public:

    constexpr KeeperDispatcher * getDispatcher() const { return dispatcher; }

+    UInt64 getKeeperMemorySoftLimit() const { return memory_soft_limit; }
+    void updateKeeperMemorySoftLimit(const Poco::Util::AbstractConfiguration & config);
+
    /// set to true when we have preprocessed or committed all the logs
    /// that were already present locally during startup
    std::atomic<bool> local_logs_preprocessed = false;
@ -92,6 +95,8 @@ private:

    KeeperFeatureFlags feature_flags;
    KeeperDispatcher * dispatcher{nullptr};
+
+    std::atomic<UInt64> memory_soft_limit = 0;
 };

 using KeeperContextPtr = std::shared_ptr<KeeperContext>;
--- a/src/Coordination/KeeperDispatcher.cpp
+++ b/src/Coordination/KeeperDispatcher.cpp
@ -51,6 +51,56 @@ namespace ErrorCodes
    extern const int SYSTEM_ERROR;
 }

+namespace
+{
+
+bool checkIfRequestIncreaseMem(const Coordination::ZooKeeperRequestPtr & request)
+{
+    if (request->getOpNum() == Coordination::OpNum::Create
+        || request->getOpNum() == Coordination::OpNum::CreateIfNotExists
+        || request->getOpNum() == Coordination::OpNum::Set)
+    {
+        return true;
+    }
+    else if (request->getOpNum() == Coordination::OpNum::Multi)
+    {
+        Coordination::ZooKeeperMultiRequest & multi_req = dynamic_cast<Coordination::ZooKeeperMultiRequest &>(*request);
+        Int64 memory_delta = 0;
+        for (const auto & sub_req : multi_req.requests)
+        {
+            auto sub_zk_request = std::dynamic_pointer_cast<Coordination::ZooKeeperRequest>(sub_req);
+            switch (sub_zk_request->getOpNum())
+            {
+                case Coordination::OpNum::Create:
+                case Coordination::OpNum::CreateIfNotExists:
+                {
+                    Coordination::ZooKeeperCreateRequest & create_req = dynamic_cast<Coordination::ZooKeeperCreateRequest &>(*sub_zk_request);
+                    memory_delta += create_req.bytesSize();
+                    break;
+                }
+                case Coordination::OpNum::Set:
+                {
+                    Coordination::ZooKeeperSetRequest & set_req = dynamic_cast<Coordination::ZooKeeperSetRequest &>(*sub_zk_request);
+                    memory_delta += set_req.bytesSize();
+                    break;
+                }
+                case Coordination::OpNum::Remove:
+                {
+                    Coordination::ZooKeeperRemoveRequest & remove_req = dynamic_cast<Coordination::ZooKeeperRemoveRequest &>(*sub_zk_request);
+                    memory_delta -= remove_req.bytesSize();
+                    break;
+                }
+                default:
+                    break;
+            }
+        }
+        return memory_delta > 0;
+    }
+
+    return false;
+}
+
+}

 KeeperDispatcher::KeeperDispatcher()
    : responses_queue(std::numeric_limits<size_t>::max())
@ -93,6 +143,14 @@ void KeeperDispatcher::requestThread()
                if (shutdown_called)
                    break;

+                Int64 mem_soft_limit = keeper_context->getKeeperMemorySoftLimit();
+                if (configuration_and_settings->standalone_keeper && mem_soft_limit > 0 && total_memory_tracker.get() >= mem_soft_limit && checkIfRequestIncreaseMem(request.request))
+                {
+                    LOG_TRACE(log, "Processing requests refused because of max_memory_usage_soft_limit {}, the total used memory is {}, request type is {}", mem_soft_limit, total_memory_tracker.get(), request.request->getOpNum());
+                    addErrorResponses({request}, Coordination::Error::ZCONNECTIONLOSS);
+                    continue;
+                }
+
                KeeperStorage::RequestsForSessions current_batch;
                size_t current_batch_bytes_size = 0;

@ -872,6 +930,8 @@ void KeeperDispatcher::updateConfiguration(const Poco::Util::AbstractConfigurati
                throw Exception(ErrorCodes::SYSTEM_ERROR, "Cannot push configuration update to queue");

    snapshot_s3.updateS3Configuration(config, macros);
+
+    keeper_context->updateKeeperMemorySoftLimit(config);
 }

 void KeeperDispatcher::updateKeeperStatLatency(uint64_t process_time_ms)
--- a/src/Coordination/Standalone/Context.cpp
+++ b/src/Coordination/Standalone/Context.cpp
@ -4,6 +4,7 @@
 #include <Common/Macros.h>
 #include <Common/ThreadPool.h>
 #include <Common/callOnce.h>
+#include <Disks/IO/IOUringReader.h>

 #include <Core/ServerSettings.h>

@ -62,6 +63,11 @@ struct ContextSharedPart : boost::noncopyable
    mutable std::unique_ptr<IAsynchronousReader> asynchronous_local_fs_reader;
    mutable std::unique_ptr<IAsynchronousReader> synchronous_local_fs_reader;

+#if USE_LIBURING
+    mutable OnceFlag io_uring_reader_initialized;
+    mutable std::unique_ptr<IOUringReader> io_uring_reader;
+#endif
+
    mutable OnceFlag threadpool_writer_initialized;
    mutable std::unique_ptr<ThreadPool> threadpool_writer;

@ -225,6 +231,17 @@ IAsynchronousReader & Context::getThreadPoolReader(FilesystemReaderType type) co
    }
 }

+#if USE_LIBURING
+IOUringReader & Context::getIOURingReader() const
+{
+    callOnce(shared->io_uring_reader_initialized, [&] {
+        shared->io_uring_reader = std::make_unique<IOUringReader>(512);
+    });
+
+    return *shared->io_uring_reader;
+}
+#endif
+
 std::shared_ptr<FilesystemCacheLog> Context::getFilesystemCacheLog() const
 {
    return nullptr;
--- a/src/Coordination/Standalone/Context.h
+++ b/src/Coordination/Standalone/Context.h
@ -20,6 +20,8 @@

 #include <memory>

+#include "config.h"
+
 namespace DB
 {

@ -28,6 +30,7 @@ class Macros;
 class FilesystemCacheLog;
 class FilesystemReadPrefetchesLog;
 class BlobStorageLog;
+class IOUringReader;

 /// A small class which owns ContextShared.
 /// We don't use something like unique_ptr directly to allow ContextShared type to be incomplete.
@ -127,6 +130,9 @@ public:
    ApplicationType getApplicationType() const { return ApplicationType::KEEPER; }

    IAsynchronousReader & getThreadPoolReader(FilesystemReaderType type) const;
+#if USE_LIBURING
+    IOUringReader & getIOURingReader() const;
+#endif
    std::shared_ptr<AsyncReadCounters> getAsyncReadCounters() const;
    ThreadPool & getThreadPoolWriter() const;

--- a/src/Core/ExternalResultDescription.cpp
+++ b/src/Core/ExternalResultDescription.cpp
@ -20,6 +20,11 @@ namespace ErrorCodes
    extern const int UNKNOWN_TYPE;
 }

+ExternalResultDescription::ExternalResultDescription(const Block & sample_block_)
+{
+    init(sample_block_);
+}
+
 void ExternalResultDescription::init(const Block & sample_block_)
 {
    sample_block = sample_block_;
--- a/src/Core/ExternalResultDescription.h
+++ b/src/Core/ExternalResultDescription.h
@ -41,6 +41,9 @@ struct ExternalResultDescription
    Block sample_block;
    std::vector<std::pair<ValueType, bool /* is_nullable */>> types;

+    ExternalResultDescription() = default;
+    explicit ExternalResultDescription(const Block & sample_block_);
+
    void init(const Block & sample_block_);
 };

--- a/src/Core/PostgreSQL/insertPostgreSQLValue.cpp
+++ b/src/Core/PostgreSQL/insertPostgreSQLValue.cpp
@ -36,7 +36,7 @@ void insertDefaultPostgreSQLValue(IColumn & column, const IColumn & sample_colum
 void insertPostgreSQLValue(
        IColumn & column, std::string_view value,
        const ExternalResultDescription::ValueType type, const DataTypePtr data_type,
-        std::unordered_map<size_t, PostgreSQLArrayInfo> & array_info, size_t idx)
+        const std::unordered_map<size_t, PostgreSQLArrayInfo> & array_info, size_t idx)
 {
    switch (type)
    {
@ -125,8 +125,8 @@ void insertPostgreSQLValue(
            pqxx::array_parser parser{value};
            std::pair<pqxx::array_parser::juncture, std::string> parsed = parser.get_next();

-            size_t dimension = 0, max_dimension = 0, expected_dimensions = array_info[idx].num_dimensions;
-            const auto parse_value = array_info[idx].pqxx_parser;
+            size_t dimension = 0, max_dimension = 0, expected_dimensions = array_info.at(idx).num_dimensions;
+            const auto parse_value = array_info.at(idx).pqxx_parser;
            std::vector<Row> dimensions(expected_dimensions + 1);

            while (parsed.first != pqxx::array_parser::juncture::done)
@ -138,7 +138,7 @@ void insertPostgreSQLValue(
                    dimensions[dimension].emplace_back(parse_value(parsed.second));

                else if (parsed.first == pqxx::array_parser::juncture::null_value)
-                    dimensions[dimension].emplace_back(array_info[idx].default_value);
+                    dimensions[dimension].emplace_back(array_info.at(idx).default_value);

                else if (parsed.first == pqxx::array_parser::juncture::row_end)
                {
--- a/src/Core/PostgreSQL/insertPostgreSQLValue.h
+++ b/src/Core/PostgreSQL/insertPostgreSQLValue.h
@ -23,7 +23,7 @@ struct PostgreSQLArrayInfo
 void insertPostgreSQLValue(
        IColumn & column, std::string_view value,
        const ExternalResultDescription::ValueType type, const DataTypePtr data_type,
-        std::unordered_map<size_t, PostgreSQLArrayInfo> & array_info, size_t idx);
+        const std::unordered_map<size_t, PostgreSQLArrayInfo> & array_info, size_t idx);

 void preparePostgreSQLArrayInfo(
        std::unordered_map<size_t, PostgreSQLArrayInfo> & array_info, size_t column_idx, const DataTypePtr data_type);
--- a/src/Core/ServerSettings.h
+++ b/src/Core/ServerSettings.h
@ -79,6 +79,9 @@ namespace DB
    \
    M(UInt64, max_table_size_to_drop, 50000000000lu, "If size of a table is greater than this value (in bytes) than table could not be dropped with any DROP query.", 0) \
    M(UInt64, max_partition_size_to_drop, 50000000000lu, "Same as max_table_size_to_drop, but for the partitions.", 0) \
+    M(UInt64, max_table_num_to_warn, 5000lu, "If number of tables is greater than this value, server will create a warning that will displayed to user.", 0) \
+    M(UInt64, max_database_num_to_warn, 1000lu, "If number of databases is greater than this value, server will create a warning that will displayed to user.", 0) \
+    M(UInt64, max_part_num_to_warn, 100000lu, "If number of databases is greater than this value, server will create a warning that will displayed to user.", 0) \
    M(UInt64, concurrent_threads_soft_limit_num, 0, "Sets how many concurrent thread can be allocated before applying CPU pressure. Zero means unlimited.", 0) \
    M(UInt64, concurrent_threads_soft_limit_ratio_to_cores, 0, "Same as concurrent_threads_soft_limit_num, but with ratio to cores.", 0) \
    \
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -156,6 +156,7 @@ class IColumn;
    M(Bool, allow_suspicious_low_cardinality_types, false, "In CREATE TABLE statement allows specifying LowCardinality modifier for types of small fixed size (8 or less). Enabling this may increase merge times and memory consumption.", 0) \
    M(Bool, allow_suspicious_fixed_string_types, false, "In CREATE TABLE statement allows creating columns of type FixedString(n) with n > 256. FixedString with length >= 256 is suspicious and most likely indicates misusage", 0) \
    M(Bool, allow_suspicious_indices, false, "Reject primary/secondary indexes and sorting keys with identical expressions", 0) \
+    M(Bool, allow_suspicious_ttl_expressions, false, "Reject TTL expressions that don't depend on any of table's columns. It indicates a user error most of the time.", 0) \
    M(Bool, compile_expressions, false, "Compile some scalar functions and operators to native code.", 0) \
    M(UInt64, min_count_to_compile_expression, 3, "The number of identical expressions before they are JIT-compiled", 0) \
    M(Bool, compile_aggregate_expressions, true, "Compile aggregate functions to native code.", 0) \
@ -927,12 +928,12 @@ class IColumn;
    M(Bool, input_format_parquet_preserve_order, false, "Avoid reordering rows when reading from Parquet files. Usually makes it much slower.", 0) \
    M(Bool, input_format_parquet_filter_push_down, true, "When reading Parquet files, skip whole row groups based on the WHERE/PREWHERE expressions and min/max statistics in the Parquet metadata.", 0) \
    M(Bool, input_format_allow_seeks, true, "Allow seeks while reading in ORC/Parquet/Arrow input formats", 0) \
-    M(Bool, input_format_orc_allow_missing_columns, false, "Allow missing columns while reading ORC input formats", 0) \
+    M(Bool, input_format_orc_allow_missing_columns, true, "Allow missing columns while reading ORC input formats", 0) \
    M(Bool, input_format_orc_use_fast_decoder, true, "Use a faster ORC decoder implementation.", 0) \
    M(Bool, input_format_orc_filter_push_down, true, "When reading ORC files, skip whole stripes or row groups based on the WHERE/PREWHERE expressions, min/max statistics or bloom filter in the ORC metadata.", 0) \
-    M(Bool, input_format_parquet_allow_missing_columns, false, "Allow missing columns while reading Parquet input formats", 0) \
+    M(Bool, input_format_parquet_allow_missing_columns, true, "Allow missing columns while reading Parquet input formats", 0) \
    M(UInt64, input_format_parquet_local_file_min_bytes_for_seek, 8192, "Min bytes required for local read (file) to do seek, instead of read with ignore in Parquet input format", 0) \
-    M(Bool, input_format_arrow_allow_missing_columns, false, "Allow missing columns while reading Arrow input formats", 0) \
+    M(Bool, input_format_arrow_allow_missing_columns, true, "Allow missing columns while reading Arrow input formats", 0) \
    M(Char, input_format_hive_text_fields_delimiter, '\x01', "Delimiter between fields in Hive Text File", 0) \
    M(Char, input_format_hive_text_collection_items_delimiter, '\x02', "Delimiter between collection(array or map) items in Hive Text File", 0) \
    M(Char, input_format_hive_text_map_keys_delimiter, '\x03', "Delimiter between a pair of map key/values in Hive Text File", 0) \
@ -941,6 +942,7 @@ class IColumn;
    M(UInt64, input_format_max_rows_to_read_for_schema_inference, 25000, "The maximum rows of data to read for automatic schema inference", 0) \
    M(UInt64, input_format_max_bytes_to_read_for_schema_inference, 32 * 1024 * 1024, "The maximum bytes of data to read for automatic schema inference", 0) \
    M(Bool, input_format_csv_use_best_effort_in_schema_inference, true, "Use some tweaks and heuristics to infer schema in CSV format", 0) \
+    M(Bool, input_format_csv_try_infer_numbers_from_strings, false, "Try to infer numbers from string fields while schema inference in CSV format", 0) \
    M(Bool, input_format_tsv_use_best_effort_in_schema_inference, true, "Use some tweaks and heuristics to infer schema in TSV format", 0) \
    M(Bool, input_format_csv_detect_header, true, "Automatically detect header with names and types in CSV format", 0) \
    M(Bool, input_format_csv_allow_whitespace_or_tab_as_delimiter, false, "Allow to use spaces and tabs(\\t) as field delimiter in the CSV strings", 0) \
@ -960,6 +962,7 @@ class IColumn;
    M(Bool, input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference, false, "Skip columns with unsupported types while schema inference for format Arrow", 0) \
    M(String, column_names_for_schema_inference, "", "The list of column names to use in schema inference for formats without column names. The format: 'column1,column2,column3,...'", 0) \
    M(String, schema_inference_hints, "", "The list of column names and types to use in schema inference for formats without column names. The format: 'column_name1 column_type1, column_name2 column_type2, ...'", 0) \
+    M(SchemaInferenceMode, schema_inference_mode, "default", "Mode of schema inference. 'default' - assume that all files have the same schema and schema can be inferred from any file, 'union' - files can have different schemas and the resulting schema should be the a union of schemas of all files", 0) \
    M(Bool, schema_inference_make_columns_nullable, true, "If set to true, all inferred types will be Nullable in schema inference for formats without information about nullability.", 0) \
    M(Bool, input_format_json_read_bools_as_numbers, true, "Allow to parse bools as numbers in JSON input formats", 0) \
    M(Bool, input_format_json_try_infer_numbers_from_strings, false, "Try to infer numbers from string fields while schema inference", 0) \
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@ -7,6 +7,7 @@
 #include <boost/algorithm/string.hpp>
 #include <map>

+
 namespace DB
 {

@ -80,6 +81,10 @@ namespace SettingsChangesHistory
 /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
 static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
 {
+    {"23.12", {{"allow_suspicious_ttl_expressions", true, false, "It is a new setting, and in previous versions the behavior was equivalent to allowing."},
+              {"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"},
+              {"input_format_orc_allow_missing_columns", false, true, "Allow missing columns in ORC files by default"},
+              {"input_format_arrow_allow_missing_columns", false, true, "Allow missing columns in Arrow files by default"}}},
    {"23.9", {{"optimize_group_by_constant_keys", false, true, "Optimize group by constant keys by default"},
              {"input_format_json_try_infer_named_tuples_from_objects", false, true, "Try to infer named Tuples from JSON objects by default"},
              {"input_format_json_read_numbers_as_strings", false, true, "Allow to read numbers as strings in JSON formats by default"},
--- a/src/Core/SettingsEnums.cpp
+++ b/src/Core/SettingsEnums.cpp
@ -196,9 +196,12 @@ IMPLEMENT_SETTING_ENUM(ExternalCommandStderrReaction, ErrorCodes::BAD_ARGUMENTS,
     {"log_last", ExternalCommandStderrReaction::LOG_LAST},
     {"throw", ExternalCommandStderrReaction::THROW}})

-IMPLEMENT_SETTING_ENUM(DateTimeOverflowBehavior, ErrorCodes::BAD_ARGUMENTS,
-                       {{"throw", FormatSettings::DateTimeOverflowBehavior::Throw},
-                        {"ignore", FormatSettings::DateTimeOverflowBehavior::Ignore},
-                        {"saturate", FormatSettings::DateTimeOverflowBehavior::Saturate}})
+IMPLEMENT_SETTING_ENUM(SchemaInferenceMode, ErrorCodes::BAD_ARGUMENTS,
+    {{"default", SchemaInferenceMode::DEFAULT},
+     {"union", SchemaInferenceMode::UNION}})

+IMPLEMENT_SETTING_ENUM(DateTimeOverflowBehavior, ErrorCodes::BAD_ARGUMENTS,
+    {{"throw", FormatSettings::DateTimeOverflowBehavior::Throw},
+     {"ignore", FormatSettings::DateTimeOverflowBehavior::Ignore},
+     {"saturate", FormatSettings::DateTimeOverflowBehavior::Saturate}})
 }
--- a/src/Core/SettingsEnums.h
+++ b/src/Core/SettingsEnums.h
@ -252,6 +252,14 @@ DECLARE_SETTING_ENUM(S3QueueAction)

 DECLARE_SETTING_ENUM(ExternalCommandStderrReaction)

+enum class SchemaInferenceMode
+{
+    DEFAULT,
+    UNION,
+};
+
+DECLARE_SETTING_ENUM(SchemaInferenceMode)
+
 DECLARE_SETTING_ENUM_WITH_RENAME(DateTimeOverflowBehavior, FormatSettings::DateTimeOverflowBehavior)

 }
--- a/src/DataTypes/IDataType.h
+++ b/src/DataTypes/IDataType.h
@ -440,6 +440,8 @@ template <typename T> inline bool isFloat(const T & data_type) { return WhichDat
 template <typename T> inline bool isNativeNumber(const T & data_type) { return WhichDataType(data_type).isNativeNumber(); }
 template <typename T> inline bool isNumber(const T & data_type) { return WhichDataType(data_type).isNumber(); }

+template <typename T> inline bool isEnum8(const T & data_type) { return WhichDataType(data_type).isEnum8(); }
+template <typename T> inline bool isEnum16(const T & data_type) { return WhichDataType(data_type).isEnum16(); }
 template <typename T> inline bool isEnum(const T & data_type) { return WhichDataType(data_type).isEnum(); }

 template <typename T> inline bool isDate(const T & data_type) { return WhichDataType(data_type).isDate(); }
--- a/src/DataTypes/Serializations/SerializationString.cpp
+++ b/src/DataTypes/Serializations/SerializationString.cpp
@ -152,6 +152,9 @@ template <int UNROLL_TIMES>
 static NO_INLINE void deserializeBinarySSE2(ColumnString::Chars & data, ColumnString::Offsets & offsets, ReadBuffer & istr, size_t limit)
 {
    size_t offset = data.size();
+    /// Avoiding calling resize in a loop improves the performance.
+    data.resize(std::max(data.capacity(), static_cast<size_t>(4096)));
+
    for (size_t i = 0; i < limit; ++i)
    {
        if (istr.eof())
@ -171,7 +174,8 @@ static NO_INLINE void deserializeBinarySSE2(ColumnString::Chars & data, ColumnSt
        offset += size + 1;
        offsets.push_back(offset);

-        data.resize(offset);
+        if (unlikely(offset > data.size()))
+            data.resize_exact(roundUpToPowerOfTwoOrZero(std::max(offset, data.size() * 2)));

        if (size)
        {
@ -203,6 +207,8 @@ static NO_INLINE void deserializeBinarySSE2(ColumnString::Chars & data, ColumnSt

        data[offset - 1] = 0;
    }
+
+    data.resize(offset);
 }


--- a/src/Databases/DDLLoadingDependencyVisitor.cpp
+++ b/src/Databases/DDLLoadingDependencyVisitor.cpp
@ -1,6 +1,10 @@
 #include <Databases/DDLLoadingDependencyVisitor.h>
 #include <Databases/DDLDependencyVisitor.h>
 #include <Dictionaries/getDictionaryConfigurationFromAST.h>
+#include "config.h"
+#if USE_LIBPQXX
+#include <Storages/PostgreSQL/StorageMaterializedPostgreSQL.h>
+#endif
 #include <Interpreters/Context.h>
 #include <Interpreters/misc.h>
 #include <Parsers/ASTCreateQuery.h>
@ -131,6 +135,14 @@ void DDLLoadingDependencyVisitor::visit(const ASTStorage & storage, Data & data)
        extractTableNameFromArgument(*storage.engine, data, 3);
    else if (storage.engine->name == "Dictionary")
        extractTableNameFromArgument(*storage.engine, data, 0);
+#if USE_LIBPQXX
+    else if (storage.engine->name == "MaterializedPostgreSQL")
+    {
+        const auto * create_query = data.create_query->as<ASTCreateQuery>();
+        auto nested_table = toString(create_query->uuid) + StorageMaterializedPostgreSQL::NESTED_TABLE_SUFFIX;
+        data.dependencies.emplace(QualifiedTableName{ .database = create_query->getDatabase(), .table = nested_table });
+    }
+#endif
 }


--- a/src/Databases/DatabaseAtomic.cpp
+++ b/src/Databases/DatabaseAtomic.cpp
@ -89,15 +89,14 @@ void DatabaseAtomic::drop(ContextPtr)
    fs::remove_all(getMetadataPath());
 }

-void DatabaseAtomic::attachTable(ContextPtr /* context_ */, const String & name, const StoragePtr & table, const String & relative_table_path)
+void DatabaseAtomic::attachTableUnlocked(ContextPtr local_context, const String & name, const StoragePtr & table, const String & relative_table_path)
 {
    assert(relative_table_path != data_path && !relative_table_path.empty());
    DetachedTables not_in_use;
-    std::lock_guard lock(mutex);
    not_in_use = cleanupDetachedTables();
    auto table_id = table->getStorageID();
    assertDetachedTableNotInUse(table_id.uuid);
-    DatabaseOrdinary::attachTableUnlocked(name, table);
+    DatabaseOrdinary::attachTableUnlocked(local_context, name, table, relative_table_path);
    table_name_to_path.emplace(std::make_pair(name, relative_table_path));
 }

@ -325,7 +324,7 @@ void DatabaseAtomic::commitCreateTable(const ASTCreateQuery & query, const Stora

        /// It throws if `table_metadata_path` already exists (it's possible if table was detached)
        renameNoReplace(table_metadata_tmp_path, table_metadata_path);  /// Commit point (a sort of)
-        attachTableUnlocked(query.getTable(), table);   /// Should never throw
+        DatabaseWithOwnTablesBase::attachTableUnlocked(query_context, query.getTable(), table, /*relative_table_path=*/ {});   /// Should never throw
        table_name_to_path.emplace(query.getTable(), table_data_path);
    }
    catch (...)
--- a/src/Databases/DatabaseAtomic.h
+++ b/src/Databases/DatabaseAtomic.h
@ -38,7 +38,6 @@ public:
    void dropTable(ContextPtr context, const String & table_name, bool sync) override;
    void dropTableImpl(ContextPtr context, const String & table_name, bool sync);

-    void attachTable(ContextPtr context, const String & name, const StoragePtr & table, const String & relative_table_path) override;
    StoragePtr detachTable(ContextPtr context, const String & name) override;

    String getTableDataPath(const String & table_name) const override;
@ -66,6 +65,8 @@ public:
    void setDetachedTableNotInUseForce(const UUID & uuid) override;

 protected:
+    void attachTableUnlocked(ContextPtr local_context, const String & name, const StoragePtr & table, const String & relative_table_path) TSA_REQUIRES(mutex) override;
+
    void commitAlterTable(const StorageID & table_id, const String & table_metadata_tmp_path, const String & table_metadata_path, const String & statement, ContextPtr query_context) override;
    void commitCreateTable(const ASTCreateQuery & query, const StoragePtr & table,
                           const String & table_metadata_tmp_path, const String & table_metadata_path, ContextPtr query_context) override;
--- a/src/Databases/DatabaseLazy.cpp
+++ b/src/Databases/DatabaseLazy.cpp
@ -18,6 +18,13 @@

 namespace fs = std::filesystem;

+
+namespace CurrentMetrics
+{
+    extern const Metric AttachedTable;
+}
+
+
 namespace DB
 {

@ -161,10 +168,9 @@ bool DatabaseLazy::empty() const
    return tables_cache.empty();
 }

-void DatabaseLazy::attachTable(ContextPtr /* context_ */, const String & table_name, const StoragePtr & table, const String &)
+void DatabaseLazy::attachTableUnlocked(ContextPtr /* context_ */, const String & table_name, const StoragePtr & table, const String &)
 {
    LOG_DEBUG(log, "Attach table {}.", backQuote(table_name));
-    std::lock_guard lock(mutex);
    time_t current_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());

    auto [it, inserted] = tables_cache.emplace(std::piecewise_construct,
@ -174,6 +180,7 @@ void DatabaseLazy::attachTable(ContextPtr /* context_ */, const String & table_n
        throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS, "Table {}.{} already exists.", backQuote(database_name), backQuote(table_name));

    it->second.expiration_iterator = cache_expiration_queue.emplace(cache_expiration_queue.end(), current_time, table_name);
+    CurrentMetrics::add(CurrentMetrics::AttachedTable, 1);
 }

 StoragePtr DatabaseLazy::detachTable(ContextPtr /* context */, const String & table_name)
@ -189,6 +196,7 @@ StoragePtr DatabaseLazy::detachTable(ContextPtr /* context */, const String & ta
        if (it->second.expiration_iterator != cache_expiration_queue.end())
            cache_expiration_queue.erase(it->second.expiration_iterator);
        tables_cache.erase(it);
+        CurrentMetrics::sub(CurrentMetrics::AttachedTable, 1);
    }
    return res;
 }
--- a/Show More
+++ b/Show More